luisrui commited on
Commit
c330598
·
1 Parent(s): c129f53

Deploy ModelLens v1: BYOK OpenAI key, size filter, official-only filter, 47k HF model pool

Browse files
README.md CHANGED
@@ -1,15 +1,83 @@
1
  ---
2
  title: ModelLens
3
- emoji: 📊
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.14.0
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
11
  license: mit
12
- short_description: 'MODELLENS: Finding the Best for Your Task!'
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: ModelLens
3
+ emoji: 🔭
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.44.0
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: Finding the Best Model for Your Task from Myriads of Models
12
  ---
13
 
14
+ # ModelLens — Finding the Best Model for Your Task from Myriads of Models
15
+
16
+ Describe your dataset → pick a task and metric → get a ranked list of HuggingFace
17
+ models likely to perform well on it. Backed by the `MLPMetric` (ablation_no_id)
18
+ checkpoint trained on the `unified_augmented` corpus, with a candidate pool of
19
+ ~47k HuggingFace models.
20
+
21
+ ## How it works
22
+
23
+ 1. Your dataset description is embedded with OpenAI `text-embedding-3-small`
24
+ (1536-dim, the same encoder used during training).
25
+ 2. The MLPMetric scores every candidate model conditioned on the embedding +
26
+ chosen task + chosen metric.
27
+ 3. We return the top-k, optionally filtered by parameter count, "official
28
+ pretrained only", or "HuggingFace-hosted only".
29
+
30
+ ## Bring your own OpenAI key
31
+
32
+ This Space does **not** ship with a baked-in OpenAI key. Paste your own
33
+ `sk-...` key into the "OpenAI API key" field — it is sent directly to OpenAI
34
+ for that single request and is **not stored, logged, or reused** by this Space.
35
+ A query costs roughly **$0.000001** on your account (about a millionth of a
36
+ dollar).
37
+
38
+ If you don't have a key yet: https://platform.openai.com/api-keys
39
+
40
+ ## Files in this Space
41
+
42
+ ```
43
+ app.py Gradio entry point
44
+ recommend.py Recommender (loads checkpoint + model pool, embeds dataset desc)
45
+ inference_lib.py Self-contained MLPMetric implementation (no module/ tree needed)
46
+ build_model_pool.py Offline helper to (re)build assets/model_pool.npz
47
+ requirements.txt Pinned deps
48
+ assets/
49
+ model_pool.npz Pre-computed candidate pool (47k models, size+family ids, popularity, HF urls)
50
+ checkpoint/
51
+ MLPMetric.pt ~37 MB trained weights
52
+ args.json Training-time hyperparameters (model dims, num_*)
53
+ data/
54
+ task2id.json Task vocab
55
+ metric2id.json Metric vocab
56
+ ```
57
+
58
+ The Space looks for the checkpoint at `checkpoint/MLPMetric.pt` and the data
59
+ JSONs at `data/`. Override with env vars `MODEL_CKPT`, `MODEL_ARGS`, `DATA_DIR`,
60
+ `POOL_PATH` if you lay things out differently.
61
+
62
+ ## Running locally
63
+
64
+ ```bash
65
+ cd web
66
+ pip install -r requirements.txt
67
+ # either set OPENAI_API_KEY in env, or paste it into the UI at runtime
68
+ python app.py
69
+ # open http://localhost:7860
70
+ ```
71
+
72
+ ## Rebuilding the model pool
73
+
74
+ When you bump the candidate set (e.g. add new HF models to `model2id.json` /
75
+ `model_profile.json`):
76
+
77
+ ```bash
78
+ python web/build_model_pool.py \
79
+ --data-dir data/unified_augmented \
80
+ --args checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id/args.json \
81
+ --out web/assets/model_pool.npz \
82
+ --min-popularity 0
83
+ ```
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio app entry point for HuggingFace Spaces.
2
+
3
+ Run locally:
4
+ cd web && python app.py
5
+ Deploy to HF Spaces:
6
+ Push the contents of ``web/`` (plus ``assets/model_pool.npz`` and the
7
+ checkpoint at ``checkpoint/...``) to a new Space with sdk=gradio.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import traceback
13
+
14
+ import gradio as gr
15
+ import pandas as pd
16
+
17
+ from recommend import default_recommender
18
+
19
+
20
+ # Load once at module import time so the model is warm before the first request.
21
+ print("Loading recommender ...")
22
+ RECOMMENDER = default_recommender()
23
+ print(f"Loaded recommender: {len(RECOMMENDER.model_names)} candidate models, "
24
+ f"{len(RECOMMENDER.task2id)} tasks, {len(RECOMMENDER.metric2id)} metrics.")
25
+
26
+ # Sort the dropdown choices for a sane UX.
27
+ TASK_CHOICES = sorted(RECOMMENDER.task2id.keys(), key=lambda x: x.lower())
28
+ # Metric vocab is huge (3k+) and noisy — restrict to the most common bare metric names.
29
+ COMMON_METRICS = [
30
+ "accuracy", "f1", "exact_match", "rouge_l", "bleu", "mean_iou",
31
+ "mean_average_precision", "top_1_accuracy", "top_5_accuracy",
32
+ "perplexity", "wer", "auc", "spearman", "pearson", "mse", "rmse",
33
+ "mc2", "accuracy_norm", "strict_accuracy",
34
+ ]
35
+ # Keep only those actually present in the metric vocab (with loose alias matching).
36
+ METRIC_CHOICES = sorted(
37
+ {m for m in COMMON_METRICS if RECOMMENDER.resolve_metric(m) != RECOMMENDER.model.unknown_metric_id}
38
+ )
39
+ if "accuracy" in COMMON_METRICS and not METRIC_CHOICES:
40
+ METRIC_CHOICES = COMMON_METRICS # fallback
41
+
42
+
43
+ EXAMPLE_DESCRIPTIONS = [
44
+ "MMLU is a multiple-choice benchmark covering 57 academic subjects, evaluating broad knowledge and reasoning ability across humanities, STEM, and social sciences.",
45
+ "GSM8K is a dataset of 8.5K high-quality grade-school math word problems requiring multi-step arithmetic reasoning to arrive at a single numerical answer.",
46
+ "ImageNet-1K contains roughly 1.28M natural images labeled with one of 1000 fine-grained object categories, widely used for image classification benchmarking.",
47
+ "CoNLL 2003 is an English named-entity recognition corpus annotating persons, organizations, locations, and miscellaneous entities in news wire text.",
48
+ ]
49
+
50
+
51
+ def _format_size(size_b: float) -> str:
52
+ """Pretty-print parameter count: '7.0B', '350M', '1.2K params', or '—' if unknown."""
53
+ if size_b is None or not (size_b == size_b) or size_b <= 0: # NaN check
54
+ return "—"
55
+ if size_b >= 1.0:
56
+ return f"{size_b:.1f}B"
57
+ if size_b >= 0.001:
58
+ return f"{size_b * 1000:.0f}M"
59
+ return f"{size_b * 1_000_000:.0f}K"
60
+
61
+
62
+ def recommend_ui(dataset_description: str, task: str, metric: str, top_k: int,
63
+ min_size: float, max_size: float, official_only: bool, hf_only: bool,
64
+ api_key: str):
65
+ if not (dataset_description or "").strip():
66
+ return pd.DataFrame(columns=["rank", "model", "score", "size", "popularity", "link"]), \
67
+ "Please enter a dataset description."
68
+
69
+ api_key = (api_key or "").strip()
70
+ if not api_key and not os.environ.get("OPENAI_API_KEY"):
71
+ return pd.DataFrame(), (
72
+ "⚠️ Please paste your OpenAI API key in the field above. "
73
+ "We use it once per request to embed your dataset description; "
74
+ "the key is **not stored or logged** by this app."
75
+ )
76
+
77
+ # 0 / blank means "no limit" on that side.
78
+ min_b = float(min_size) if min_size and float(min_size) > 0 else None
79
+ max_b = float(max_size) if max_size and float(max_size) > 0 else None
80
+ if min_b is not None and max_b is not None and min_b > max_b:
81
+ return pd.DataFrame(), "⚠️ Min size must be ≤ max size."
82
+
83
+ try:
84
+ recs = RECOMMENDER.recommend(
85
+ dataset_description=dataset_description,
86
+ task=task,
87
+ metric=metric,
88
+ top_k=int(top_k),
89
+ popularity_weight=0.0,
90
+ hf_only=bool(hf_only),
91
+ min_size_b=min_b,
92
+ max_size_b=max_b,
93
+ official_only=bool(official_only),
94
+ api_key=api_key or None,
95
+ )
96
+ except ValueError as e:
97
+ return pd.DataFrame(), f"⚠️ {e}"
98
+ except Exception:
99
+ return pd.DataFrame(), f"⚠️ Internal error:\n```\n{traceback.format_exc()}\n```"
100
+
101
+ rows = []
102
+ for r in recs:
103
+ link = f"[link]({r.hf_url})" if r.hf_url else "—"
104
+ rows.append({
105
+ "rank": r.rank,
106
+ "model": r.model_name,
107
+ "score": round(r.score, 4),
108
+ "size": _format_size(r.size_b),
109
+ "popularity": r.popularity,
110
+ "link": link,
111
+ })
112
+ df = pd.DataFrame(rows, columns=["rank", "model", "score", "size", "popularity", "link"])
113
+ return df, f"Returned top-{len(rows)} of {len(RECOMMENDER.model_names)} candidates."
114
+
115
+
116
+ with gr.Blocks(title="ModelLens · Finding the Best Model for Your Task", theme=gr.themes.Soft()) as demo:
117
+ gr.Markdown(
118
+ """
119
+ # ModelLens: Finding the Best for Your Task from Myriads of Models
120
+ Describe your dataset, pick a task type and a metric, and ModelLens returns
121
+ the top candidates from a pool of **47k+** HuggingFace models. Backed by the
122
+ ablation_no_id MLPMetric checkpoint trained on `unified_augmented`.
123
+
124
+ > **BYO OpenAI key.** This Space embeds your dataset description with
125
+ > `text-embedding-3-small`. You provide your own key in the field below
126
+ > — it is sent directly to OpenAI for that single request and is never
127
+ > stored, logged, or reused by this app. A query costs roughly
128
+ > **$0.000001** on your account.
129
+ """
130
+ )
131
+ with gr.Row():
132
+ with gr.Column(scale=2):
133
+ desc = gr.Textbox(
134
+ label="Dataset description",
135
+ placeholder="Describe your dataset in 2-3 sentences. The more specific, the better.",
136
+ lines=5,
137
+ )
138
+ with gr.Row():
139
+ task = gr.Dropdown(
140
+ choices=TASK_CHOICES, label="Task type", value="Question Answering"
141
+ if "Question Answering" in TASK_CHOICES else TASK_CHOICES[0],
142
+ filterable=True,
143
+ )
144
+ metric = gr.Dropdown(
145
+ choices=METRIC_CHOICES, label="Metric (optional)",
146
+ value="accuracy" if "accuracy" in METRIC_CHOICES else (METRIC_CHOICES[0] if METRIC_CHOICES else None),
147
+ filterable=True, allow_custom_value=True,
148
+ )
149
+ top_k = gr.Slider(5, 100, value=20, step=5, label="Top-k")
150
+ api_key = gr.Textbox(
151
+ label="OpenAI API key (sk-...)",
152
+ placeholder="Paste your key — used once per request, never stored or logged.",
153
+ type="password",
154
+ lines=1,
155
+ )
156
+ with gr.Row():
157
+ min_size = gr.Number(
158
+ value=0, label="Min size (B params, 0 = no min)",
159
+ minimum=0, precision=2,
160
+ )
161
+ max_size = gr.Number(
162
+ value=0, label="Max size (B params, 0 = no max)",
163
+ minimum=0, precision=2,
164
+ )
165
+ official_only = gr.Checkbox(
166
+ value=False,
167
+ label="Only recommend official pretrained models (DeepSeek, Qwen, Llama, gpt-oss, Mistral, Gemma, Phi, ...)",
168
+ )
169
+ hf_only = gr.Checkbox(
170
+ value=True,
171
+ label="Only show models hosted on HuggingFace (drops paper baselines like 'inceptionv4')",
172
+ )
173
+ run_btn = gr.Button("Recommend", variant="primary")
174
+ gr.Examples(
175
+ examples=[[d] for d in EXAMPLE_DESCRIPTIONS],
176
+ inputs=[desc],
177
+ outputs=[],
178
+ label="Example dataset descriptions (click to fill, then press Recommend)",
179
+ run_on_click=False,
180
+ )
181
+ with gr.Column(scale=3):
182
+ status = gr.Markdown("")
183
+ table = gr.Dataframe(
184
+ headers=["rank", "model", "score", "size", "popularity", "link"],
185
+ interactive=False,
186
+ wrap=True,
187
+ datatype=["number", "str", "number", "str", "number", "markdown"],
188
+ )
189
+
190
+ run_btn.click(
191
+ recommend_ui,
192
+ inputs=[desc, task, metric, top_k, min_size, max_size, official_only, hf_only, api_key],
193
+ outputs=[table, status],
194
+ )
195
+
196
+ if __name__ == "__main__":
197
+ demo.queue(max_size=16).launch(
198
+ server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
199
+ server_port=int(os.environ.get("GRADIO_SERVER_PORT", 7860)),
200
+ share=False,
201
+ )
assets/model_pool.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66552520f9534fce6e4a530fe9ba55f8cf046d0c68ee0197eca02a988425c855
3
+ size 5820984
build_model_pool.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Build the candidate model pool consumed by the recommendation web app.
2
+
3
+ The output is a single .npz that bundles, for every candidate model:
4
+ - model_name (str)
5
+ - size_id (int, bucket id matching the trained MLPMetric)
6
+ - family_id (int)
7
+ - popularity (int, HF downloads in the last 30d; 0 if unknown)
8
+ - hf_url (str, https://huggingface.co/<name> if name looks like a repo id)
9
+
10
+ Run from the project root:
11
+ python web/build_model_pool.py \
12
+ --data-dir data/unified_augmented \
13
+ --args checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id/args.json \
14
+ --out web/assets/model_pool.npz
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import numpy as np
22
+
23
+
24
+ SIZE_EDGES_DEFAULT = [
25
+ 0.001, 0.003, 0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.3, 0.4,
26
+ 0.5, 0.6, 0.8, 1, 3, 7, 14, 35, 70, 100, 1000,
27
+ ]
28
+
29
+
30
+ def assign_size_bucket(size_b: float, size_edges: np.ndarray, unknown_id: int) -> int:
31
+ try:
32
+ x = float(size_b)
33
+ except (TypeError, ValueError):
34
+ return unknown_id
35
+ if not np.isfinite(x) or x == 0.0:
36
+ return unknown_id
37
+ return int(np.searchsorted(size_edges, x, side="right"))
38
+
39
+
40
+ def get_size_b(profile_entry) -> float:
41
+ if not isinstance(profile_entry, dict):
42
+ return float("nan")
43
+ size = profile_entry.get("size")
44
+ try:
45
+ if isinstance(size, str) and size.strip().lower() == "unknown":
46
+ return float("nan")
47
+ x = float(size)
48
+ return x if x != 0.0 else float("nan")
49
+ except Exception:
50
+ return float("nan")
51
+
52
+
53
+ def hf_url_for(name: str) -> str:
54
+ return f"https://huggingface.co/{name}" if "/" in name else ""
55
+
56
+
57
+ def main(argv=None):
58
+ p = argparse.ArgumentParser()
59
+ p.add_argument("--data-dir", default="data/unified_augmented")
60
+ p.add_argument(
61
+ "--args",
62
+ default="checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id/args.json",
63
+ help="Path to the training args.json — used to read size_bucket so bucket ids align with the checkpoint.",
64
+ )
65
+ p.add_argument("--out", default="web/assets/model_pool.npz")
66
+ p.add_argument(
67
+ "--min-popularity",
68
+ type=int,
69
+ default=0,
70
+ help="Drop candidate models with HF download count below this. 0 keeps all.",
71
+ )
72
+ args = p.parse_args(argv)
73
+
74
+ os.makedirs(os.path.dirname(args.out), exist_ok=True)
75
+
76
+ with open(os.path.join(args.data_dir, "model2id.json")) as f:
77
+ model2id = json.load(f)
78
+ with open(os.path.join(args.data_dir, "model2family.json")) as f:
79
+ model2family = json.load(f)
80
+ with open(os.path.join(args.data_dir, "family2id.json")) as f:
81
+ family2id = json.load(f)
82
+ with open(os.path.join(args.data_dir, "model_profile.json")) as f:
83
+ model_profile = json.load(f)
84
+ pop_path = os.path.join(args.data_dir, "model_popularity.json")
85
+ pop_map = {}
86
+ if os.path.exists(pop_path):
87
+ pop_doc = json.load(open(pop_path))
88
+ # Doc shape: {fetched_at, source, num_models, status_counts, models: {name: {downloads, status}}}
89
+ models_field = pop_doc.get("models", pop_doc)
90
+ for name, entry in models_field.items():
91
+ if isinstance(entry, dict):
92
+ pop_map[name] = int(entry.get("downloads", 0) or 0)
93
+ else:
94
+ try:
95
+ pop_map[name] = int(entry)
96
+ except Exception:
97
+ pop_map[name] = 0
98
+
99
+ if os.path.exists(args.args):
100
+ train_args = json.load(open(args.args))
101
+ size_edges = np.array(train_args.get("size_bucket", SIZE_EDGES_DEFAULT), dtype=float)
102
+ else:
103
+ size_edges = np.array(SIZE_EDGES_DEFAULT, dtype=float)
104
+ unknown_size_id = len(size_edges) + 1
105
+
106
+ unknown_family_id = family2id.get("unknown", len(family2id) - 1)
107
+
108
+ names = []
109
+ size_ids = []
110
+ sizes_b = []
111
+ family_ids = []
112
+ popularities = []
113
+ urls = []
114
+ dropped_pop = 0
115
+ for name in model2id.keys():
116
+ pop = pop_map.get(name, 0)
117
+ if pop < args.min_popularity:
118
+ dropped_pop += 1
119
+ continue
120
+ size_b = get_size_b(model_profile.get(name))
121
+ sid = assign_size_bucket(size_b, size_edges, unknown_size_id)
122
+ fam = model2family.get(name, "unknown")
123
+ fid = family2id.get(fam, unknown_family_id)
124
+ names.append(name)
125
+ size_ids.append(sid)
126
+ sizes_b.append(size_b) # NaN means unknown
127
+ family_ids.append(fid)
128
+ popularities.append(pop)
129
+ urls.append(hf_url_for(name))
130
+
131
+ names_arr = np.array(names, dtype=object)
132
+ size_arr = np.array(size_ids, dtype=np.int64)
133
+ sizes_b_arr = np.array(sizes_b, dtype=np.float32)
134
+ fam_arr = np.array(family_ids, dtype=np.int64)
135
+ pop_arr = np.array(popularities, dtype=np.int64)
136
+ url_arr = np.array(urls, dtype=object)
137
+
138
+ np.savez(
139
+ args.out,
140
+ names=names_arr,
141
+ size_ids=size_arr,
142
+ sizes_b=sizes_b_arr,
143
+ family_ids=fam_arr,
144
+ popularities=pop_arr,
145
+ urls=url_arr,
146
+ )
147
+ print(f"Wrote {len(names)} models to {args.out} (dropped {dropped_pop} below min-popularity={args.min_popularity})")
148
+ print(f" unique families: {len(set(family_ids))}, unique size buckets: {len(set(size_ids))}")
149
+ print(f" models with HF URL: {sum(1 for u in urls if u)} / {len(urls)}")
150
+
151
+
152
+ if __name__ == "__main__":
153
+ main()
checkpoint/MLPMetric.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6f25ad9d9052a92d345b770099f029cba0b42f5b9923ccc97b06353be50d6b
3
+ size 38506845
checkpoint/args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"device": "cuda:0", "use_data_parallel": false, "device_ids": [0, 1, 2, 3], "use_ddp": true, "ddp_find_unused_parameters": false, "num_workers": 0, "pin_memory": false, "persistent_workers": false, "data_name": "unified_augmented", "ood_split_mode": "new_dataset_evaluation", "seed": 2025, "use_wandb": true, "wandb_project": "ModelProfile", "wandb_entity": "ruicai-ucdavis", "trail_name": "ablation_no_model_id_no_dataset_id", "start_epoch": 0, "checkpoint_path": "", "is_train": true, "is_ood": true, "loss_type": "ensemble", "point_loss_weight": 0.1, "early_stop": 20, "num_epochs": 1000, "batch_size": 8, "pair_batch_size": 1024, "learning_rate": 0.001, "weight_decay": 0.0001, "tau": 10.0, "lambda_list": 0.5, "lambda_pair": 1.0, "alpha": 3.0, "size_bucket": [0.001, 0.003, 0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1, 3, 7, 14, 35, 70, 100, 1000], "use_id_emb": false, "model_dim": 1536, "token_dim": 512, "use_size_prior": true, "size_dim": 64, "use_family_prior": true, "family_dim": 64, "dataset_desp_dim": 1536, "task_dim": 256, "model_name": "MLPMetric", "hidden_dim": 512, "dropout_rate": 0.02, "topk": [1, 3, 5, 7, 10, 30, 50, 70, 100], "margin_eps": 0.02, "val_eval_target_models_all_datasets": false, "val_eval_fixed_backbones": false, "save_best_ic8x10_checkpoint": false, "test_eval_target_models_all_datasets": false, "config": "config/ablations/MLPMetric_NoModelID_unified_augmented.yaml", "is_distributed": true, "world_size": 4, "rank": 0, "local_rank": 0, "num_models": 47062, "num_tasks": 2551, "num_metrics": 8420, "unknown_metric_id": 0, "num_size_buckets": 23, "num_families": 331}
data/metric2id.json ADDED
@@ -0,0 +1,3174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "#_of_tokens": 0,
3
+ "#_params_m": 1,
4
+ "#_params_m_-_img": 2,
5
+ "#_params_m_-_txt": 3,
6
+ "#_params_m_img": 4,
7
+ "#_params_m_img+txt": 5,
8
+ "#_params_m_img_+_txt": 6,
9
+ "#_params_m_txt": 7,
10
+ "#_seen_samples_b": 8,
11
+ "#samples": 9,
12
+ "%_test_accuracy": 10,
13
+ "0-shot": 11,
14
+ "0-shot_accuracy": 12,
15
+ "0-shot_cot": 13,
16
+ "0-shot_rougel": 14,
17
+ "0l": 15,
18
+ "1-shot": 16,
19
+ "1-shot_top-1": 17,
20
+ "10-20%_mask_psnr": 18,
21
+ "10-shot": 19,
22
+ "10-shot_accuracy": 20,
23
+ "10-way_1~2-shot": 21,
24
+ "10-way_5~10-shot": 22,
25
+ "128k": 23,
26
+ "12k": 24,
27
+ "15k_accuracy": 25,
28
+ "15k_normalized": 26,
29
+ "16k": 27,
30
+ "1:1_accuracy": 28,
31
+ "1_image_2*2_stitching_exact_accuracy": 29,
32
+ "1px_total": 30,
33
+ "2-shot": 31,
34
+ "2-shot_cot": 32,
35
+ "25-shot": 33,
36
+ "2r._avg.": 34,
37
+ "3-5-shot": 35,
38
+ "3-fold_accuracy": 36,
39
+ "3-shot": 37,
40
+ "3-shot_cot": 38,
41
+ "3-shot_f1": 39,
42
+ "300_samples_greedy_decoding": 40,
43
+ "4-class_test_accuracy": 41,
44
+ "4-shot": 42,
45
+ "4-shot_cot": 43,
46
+ "40k_accuracy": 44,
47
+ "40k_normalized": 45,
48
+ "5-fold_cv_accuracy_mean": 46,
49
+ "5-fold_cv_f1_mean": 47,
50
+ "5-fold_cv_precision_mean": 48,
51
+ "5-fold_cv_recall_mean": 49,
52
+ "5-shot": 50,
53
+ "5-shot_accuracy": 51,
54
+ "5-shot_maj@1": 52,
55
+ "5-shot_top-1": 53,
56
+ "5-shot_top-1_accuracy": 54,
57
+ "5-way_5~10-shot": 55,
58
+ "50%_cytotoxicity_threshold_hits": 56,
59
+ "5_way_1~2_shot": 57,
60
+ "7-shot": 58,
61
+ "8-shot": 59,
62
+ "8-shot_cot": 60,
63
+ "a2": 61,
64
+ "abductive": 62,
65
+ "absolute_distance": 63,
66
+ "absolute_trajectory_error_m": 64,
67
+ "absrel": 65,
68
+ "abstention_f1": 66,
69
+ "acc-norm_0-shot": 67,
70
+ "acc.": 68,
71
+ "acc._norm": 69,
72
+ "acc_%": 70,
73
+ "acc_fluctuations": 71,
74
+ "acc_length_num_draft_tokens=4": 72,
75
+ "acc_length_num_draft_tokens=8": 73,
76
+ "acc_n": 74,
77
+ "acc_none_ceval-valid": 75,
78
+ "acc_none_cmmlu": 76,
79
+ "acc_none_meta_mmlu_5shot_pretrain": 77,
80
+ "accent_acc": 78,
81
+ "accuarcy": 79,
82
+ "accuracy": 80,
83
+ "accuracy-norm": 81,
84
+ "accuracy@1": 82,
85
+ "accuracy@10": 83,
86
+ "accuracy@100": 84,
87
+ "accuracy@3": 85,
88
+ "accuracy@5": 86,
89
+ "accuracy_%": 87,
90
+ "accuracy_'bezeichnung'": 88,
91
+ "accuracy_'thema'": 89,
92
+ "accuracy_-_clean_images": 90,
93
+ "accuracy_0-shot": 91,
94
+ "accuracy_0_shot": 92,
95
+ "accuracy_10-shot": 93,
96
+ "accuracy_20-vote": 94,
97
+ "accuracy_25-shot": 95,
98
+ "accuracy_5-shot": 96,
99
+ "accuracy_5_shot": 97,
100
+ "accuracy_@_iou_0.5": 98,
101
+ "accuracy_acc": 99,
102
+ "accuracy_all_extraction": 100,
103
+ "accuracy_cardiffnlp/tweet_sentiment_multilingual/all": 101,
104
+ "accuracy_cardiffnlp/tweet_topic_multi": 102,
105
+ "accuracy_cardiffnlp/tweet_topic_single": 103,
106
+ "accuracy_clean_extraction": 104,
107
+ "accuracy_cosinus": 105,
108
+ "accuracy_cross-setup": 106,
109
+ "accuracy_cs": 107,
110
+ "accuracy_easy": 108,
111
+ "accuracy_epoch=1": 109,
112
+ "accuracy_estimated": 110,
113
+ "accuracy_euclidean": 111,
114
+ "accuracy_hamming": 112,
115
+ "accuracy_high": 113,
116
+ "accuracy_llm-judge_1-3": 114,
117
+ "accuracy_manhattan": 115,
118
+ "accuracy_norm": 116,
119
+ "accuracy_on_closed_subset": 117,
120
+ "accuracy_private": 118,
121
+ "accuracy_quantized": 119,
122
+ "accuracy_queue": 120,
123
+ "accuracy_report": 121,
124
+ "accuracy_score": 122,
125
+ "accuracy_stderr": 123,
126
+ "accuracy_test": 124,
127
+ "accuracy_threshold": 125,
128
+ "accuracy_top-1": 126,
129
+ "accuracy_top-5": 127,
130
+ "accuracy_top2": 128,
131
+ "accuracy_tweet_eval/emoji": 129,
132
+ "accuracy_tweet_eval/emotion": 130,
133
+ "accuracy_tweet_eval/hate": 131,
134
+ "accuracy_tweet_eval/irony": 132,
135
+ "accuracy_tweet_eval/offensive": 133,
136
+ "accuracy_tweet_eval/sentiment": 134,
137
+ "accuracy_type": 135,
138
+ "accuracy_zero-shot": 136,
139
+ "accuray": 137,
140
+ "action@1": 138,
141
+ "action_repetition": 139,
142
+ "actionability": 140,
143
+ "active_dims": 141,
144
+ "acur\u00e1cia": 142,
145
+ "ade": 143,
146
+ "adjusted_rand_index": 144,
147
+ "aesthetics_laion_aesthtetics_predictor": 145,
148
+ "age": 146,
149
+ "age_acc": 147,
150
+ "age_mae_years": 148,
151
+ "aggregate_rmse_multi-head_\u2192_final": 149,
152
+ "aggregate_r\u00b2_multi-head_\u2192_final": 150,
153
+ "ai2_reasoning_challenge": 151,
154
+ "ai2_reasoning_challenge_25-shot": 152,
155
+ "aic": 153,
156
+ "aime": 154,
157
+ "aime24": 155,
158
+ "aime24-th": 156,
159
+ "aime25": 157,
160
+ "aime_2025": 158,
161
+ "aime_25": 159,
162
+ "aligned-relative_word_error_rate_arwer_%": 160,
163
+ "alignscore": 161,
164
+ "all": 162,
165
+ "all_levels": 163,
166
+ "all_samples_greedy_decoding": 164,
167
+ "alpacaeval": 165,
168
+ "alpacaeval_win_rate_%": 166,
169
+ "ami_xiug_->_zho_hant_zh": 167,
170
+ "amota": 168,
171
+ "anls": 169,
172
+ "ap": 170,
173
+ "ap50": 171,
174
+ "ap75": 172,
175
+ "ap@_.5": 173,
176
+ "ap@_.5_.95": 174,
177
+ "ap@_.75": 175,
178
+ "ap@iou=0.50": 176,
179
+ "ap@iou=0.75": 177,
180
+ "ap_@_iou=0.50:0.95_|_area=all_|_maxdets=100": 178,
181
+ "ap_@_iou=0.50:0.95_|_area=large_|_maxdets=100": 179,
182
+ "ap_@_iou=0.50:0.95_|_area=medium_|_maxdets=100": 180,
183
+ "ap_@_iou=0.50:0.95_|_area=small_|_maxdets=100": 181,
184
+ "ap_@_iou=0.50_|_area=all_|_maxdets=100": 182,
185
+ "ap_@_iou=0.75_|_area=all_|_maxdets=100": 183,
186
+ "ap_easy": 184,
187
+ "ap_iou=0.50:0.95": 185,
188
+ "ap_novel-lvis_base_training": 186,
189
+ "ap_stderr": 187,
190
+ "ap_weighted": 188,
191
+ "aph/l2": 189,
192
+ "api": 190,
193
+ "apl_large_objects": 191,
194
+ "apm_medium_objects": 192,
195
+ "appearance_order": 193,
196
+ "approximate_accuracy": 194,
197
+ "aps_small_objects": 195,
198
+ "ap|r40_easy": 196,
199
+ "ar-large": 197,
200
+ "ar@0.50": 198,
201
+ "ar@0.75": 199,
202
+ "ar@_iou=0.50:0.95_|_maxdets=100": 200,
203
+ "ar_@_iou=0.50:0.95_|_area=all_|_maxdets=1": 201,
204
+ "ar_@_iou=0.50:0.95_|_area=all_|_maxdets=10": 202,
205
+ "ar_@_iou=0.50:0.95_|_area=all_|_maxdets=100": 203,
206
+ "ar_@_iou=0.50:0.95_|_area=large_|_maxdets=100": 204,
207
+ "ar_@_iou=0.50:0.95_|_area=medium_|_maxdets=100": 205,
208
+ "ar_@_iou=0.50:0.95_|_area=small_|_maxdets=100": 206,
209
+ "ar_ch": 207,
210
+ "arc": 208,
211
+ "arc_25-shot": 209,
212
+ "arc_challenge": 210,
213
+ "arc_challenge_0-shot": 211,
214
+ "arc_challenge_de_0-shot": 212,
215
+ "arc_challenge_de_5-shot": 213,
216
+ "arc_easy": 214,
217
+ "arc_mc": 215,
218
+ "arc_task_solve_rate_pass@1": 216,
219
+ "arc_task_solve_rate_pass@10": 217,
220
+ "arc_task_solve_rate_pass@100": 218,
221
+ "arc_task_solve_rate_pass@2": 219,
222
+ "area-under-the-receiver-operating-characteristic": 220,
223
+ "ari": 221,
224
+ "ari-fg": 222,
225
+ "arousal-valence_mse": 223,
226
+ "article_generation_success_rate": 224,
227
+ "artificial_analysis_coding_index": 225,
228
+ "artificial_analysis_intelligence_index": 226,
229
+ "artificial_analysis_math_index": 227,
230
+ "asr-bleu": 228,
231
+ "assa": 229,
232
+ "auc": 230,
233
+ "auc-roc": 231,
234
+ "auc_covid-19": 232,
235
+ "auc_healthy": 233,
236
+ "auc_symptomatic": 234,
237
+ "audio-to-text_r@1": 235,
238
+ "audio-to-text_r@10": 236,
239
+ "audio-to-text_r@5": 237,
240
+ "audio_quality": 238,
241
+ "audio_quality_mos": 239,
242
+ "auprc": 240,
243
+ "auroc": 241,
244
+ "auroc_1-shot": 242,
245
+ "available_dists.": 243,
246
+ "average": 244,
247
+ "average-map": 245,
248
+ "average_accuracy": 246,
249
+ "average_accuracy_improvement": 247,
250
+ "average_accuracy_of_3_splits": 248,
251
+ "average_auc-roc": 249,
252
+ "average_auc_on_14_label": 250,
253
+ "average_bleu": 251,
254
+ "average_confidence": 252,
255
+ "average_decisions": 253,
256
+ "average_end-point_error": 254,
257
+ "average_exact_match": 255,
258
+ "average_f1": 256,
259
+ "average_f1-score": 257,
260
+ "average_hallucinations": 258,
261
+ "average_improvement_vs_base": 259,
262
+ "average_incremental_accuracy": 260,
263
+ "average_individual_accuracy": 261,
264
+ "average_individual_loss": 262,
265
+ "average_iou": 263,
266
+ "average_jaccard": 264,
267
+ "average_latency_ms": 265,
268
+ "average_macro-f1": 266,
269
+ "average_map": 267,
270
+ "average_media_wer_processed": 268,
271
+ "average_mpjpe_mm": 269,
272
+ "average_pearson": 270,
273
+ "average_pixel_f1_fixed_threshold": 271,
274
+ "average_precision": 272,
275
+ "average_precision_macro": 273,
276
+ "average_precision_micro": 274,
277
+ "average_psnr_db": 275,
278
+ "average_quality_score": 276,
279
+ "average_recall@iou:0.5-0.95": 277,
280
+ "average_response_time_seconds": 278,
281
+ "average_reward_live": 279,
282
+ "average_reward_score": 280,
283
+ "average_reward_stress": 281,
284
+ "average_roc_auc": 282,
285
+ "average_rtfx": 283,
286
+ "average_score": 284,
287
+ "average_score_on_11_academic_benchmarks": 285,
288
+ "average_score_on_15_academic_benchmarks": 286,
289
+ "average_score_on_vlm2-bench_9_subtasks": 287,
290
+ "average_scores_5-shot": 288,
291
+ "average_spearman": 289,
292
+ "average_top-1_accuracy": 290,
293
+ "average_top-1_classification_accuracy": 291,
294
+ "average_win_$": 292,
295
+ "averageaccuracy": 293,
296
+ "averaged_accuracy": 294,
297
+ "averagepass@1": 295,
298
+ "avg": 296,
299
+ "avg.": 297,
300
+ "avg._bleu": 298,
301
+ "avg._perf._%_on_38_datasets": 299,
302
+ "avg._score_by_gpt-4o": 300,
303
+ "avg._sequence_length": 301,
304
+ "avg._sequence_length_d_to_d": 302,
305
+ "avg._test_bertscore": 303,
306
+ "avg@10": 304,
307
+ "avg@16": 305,
308
+ "avg@32": 306,
309
+ "avg@4": 307,
310
+ "avg_acc": 308,
311
+ "avg_acc_french_on_development_set": 309,
312
+ "avg_acc_german_on_development_set": 310,
313
+ "avg_acc_japanese_on_development_set": 311,
314
+ "avg_dsc": 312,
315
+ "avg_f1": 313,
316
+ "avg_flops": 314,
317
+ "avg_latency": 315,
318
+ "avg_map_0.3:0.7": 316,
319
+ "avg_positive_predictions": 317,
320
+ "avg_prompt/instruction_acc_loose/strict": 318,
321
+ "avg_prompt_strict_+_inst_strict": 319,
322
+ "avg_reward": 320,
323
+ "avg_target_words": 321,
324
+ "avg_wer": 322,
325
+ "avg_words_per_sec": 323,
326
+ "b1": 324,
327
+ "background_specificity": 325,
328
+ "balanced_accuracy": 326,
329
+ "bartscore": 327,
330
+ "base_score": 328,
331
+ "baseline_bleu": 329,
332
+ "baseline_chrf": 330,
333
+ "basic_skills": 331,
334
+ "batch_size": 332,
335
+ "bbh": 333,
336
+ "bem": 334,
337
+ "benchmark_score": 335,
338
+ "bert": 336,
339
+ "bert_score": 337,
340
+ "bertscore": 338,
341
+ "bertscore-f1": 339,
342
+ "bertscore_f1": 340,
343
+ "bertscore_mean_f1": 341,
344
+ "bertscore_mean_precision": 342,
345
+ "bertscore_mean_recall": 343,
346
+ "bertscore_precision": 344,
347
+ "bertscore_recall": 345,
348
+ "bertscore_xlm-r-large": 346,
349
+ "best-of": 347,
350
+ "best_accuracy_128_dim": 348,
351
+ "best_eval_loss": 349,
352
+ "best_eval_reward": 350,
353
+ "best_evaluation_reward": 351,
354
+ "best_exact": 352,
355
+ "best_exact_thresh": 353,
356
+ "best_f1": 354,
357
+ "best_f1_256_dim": 355,
358
+ "best_f1_thresh": 356,
359
+ "best_individual_accuracy": 357,
360
+ "best_max_drawdown_tsla": 358,
361
+ "best_sharpe_ratio_amzn": 359,
362
+ "best_total_return_amzn": 360,
363
+ "best_wer": 361,
364
+ "best_win_rate_msft": 362,
365
+ "bigcodebench": 363,
366
+ "binary_accuracy": 364,
367
+ "binary_cosine_accuracy@1": 365,
368
+ "binary_cosine_accuracy@10": 366,
369
+ "binary_cosine_accuracy@3": 367,
370
+ "binary_cosine_accuracy@5": 368,
371
+ "binary_cosine_map@100": 369,
372
+ "binary_cosine_mrr@10": 370,
373
+ "binary_cosine_ndcg@10": 371,
374
+ "binary_cosine_precision@1": 372,
375
+ "binary_cosine_precision@10": 373,
376
+ "binary_cosine_precision@3": 374,
377
+ "binary_cosine_precision@5": 375,
378
+ "binary_cosine_recall@1": 376,
379
+ "binary_cosine_recall@10": 377,
380
+ "binary_cosine_recall@3": 378,
381
+ "binary_cosine_recall@5": 379,
382
+ "biology": 380,
383
+ "bit_per_character_bpc": 381,
384
+ "bits_per_byte": 382,
385
+ "bits_per_weight_4-bit": 383,
386
+ "bits_per_weight_8-bit": 384,
387
+ "blanc": 385,
388
+ "bleu": 386,
389
+ "bleu-1": 387,
390
+ "bleu-2": 388,
391
+ "bleu-4": 389,
392
+ "bleu-4_score": 390,
393
+ "bleu@1": 391,
394
+ "bleu@2": 392,
395
+ "bleu@3": 393,
396
+ "bleu@4": 394,
397
+ "bleu_acc": 395,
398
+ "bleu_diff": 396,
399
+ "bleu_improvement": 397,
400
+ "bleu_improvement_percent": 398,
401
+ "bleu_max": 399,
402
+ "bleu_on_common_voice_17.0": 400,
403
+ "bleu_score": 401,
404
+ "bleu_xx\u2192eng": 402,
405
+ "bleurt": 403,
406
+ "bleurt_acc": 404,
407
+ "bleurt_diff": 405,
408
+ "bleurt_max": 406,
409
+ "bleurt_mean": 407,
410
+ "block-fid": 408,
411
+ "block-fid_right_extend": 409,
412
+ "block_size": 410,
413
+ "boolq": 411,
414
+ "box_ap": 412,
415
+ "box_map": 413,
416
+ "bsq-rate_over_erqa": 414,
417
+ "byte_perplexity": 415,
418
+ "ca": 416,
419
+ "cap._avg._r@1": 417,
420
+ "case-sensitive_sacrebleu": 418,
421
+ "casehold": 419,
422
+ "categorization_ablation": 420,
423
+ "category_clustering_main": 421,
424
+ "category_miou": 422,
425
+ "ccc": 423,
426
+ "cd": 424,
427
+ "cda": 425,
428
+ "cer": 426,
429
+ "cer-char": 427,
430
+ "cer-rome": 428,
431
+ "cer_%": 429,
432
+ "cer_catalan": 430,
433
+ "cer_character_error_rate": 431,
434
+ "cer_documentaries": 432,
435
+ "cer_lm": 433,
436
+ "cer_normalized": 434,
437
+ "cer_on_common_voice_17.0": 435,
438
+ "cer_raw": 436,
439
+ "cer_spanish": 437,
440
+ "cer_test": 438,
441
+ "cer_validation": 439,
442
+ "cfg_scale": 440,
443
+ "chair_i": 441,
444
+ "character-level_accuracy": 442,
445
+ "character_accuracy": 443,
446
+ "character_error_rate": 444,
447
+ "character_error_rate_cer": 445,
448
+ "character_persistence_\u22655_frames": 446,
449
+ "character_precision": 447,
450
+ "character_recall": 448,
451
+ "china_specific": 449,
452
+ "chord_match": 450,
453
+ "chr-f": 451,
454
+ "chrf": 452,
455
+ "chrf++": 453,
456
+ "chrf2": 454,
457
+ "chrf_eng\u2192xx": 455,
458
+ "chrf_improvement": 456,
459
+ "chrf_improvement_percent": 457,
460
+ "chrf_on_common_voice_17.0": 458,
461
+ "chrf_score": 459,
462
+ "chrf_xx\u2192eng": 460,
463
+ "cider": 461,
464
+ "cider-d": 462,
465
+ "citation_classification": 463,
466
+ "classification_accuracy": 464,
467
+ "classifier_dropout": 465,
468
+ "click_accuracy": 466,
469
+ "clip": 467,
470
+ "clip-s": 468,
471
+ "clip_r-precision": 469,
472
+ "clip_score": 470,
473
+ "clipscore": 471,
474
+ "clipsim": 472,
475
+ "clustering_accuracy": 473,
476
+ "clustering_miou": 474,
477
+ "coco-style_ap": 475,
478
+ "code_accuracy": 476,
479
+ "codebleu": 477,
480
+ "coding": 478,
481
+ "cohen_kappa": 479,
482
+ "coherence": 480,
483
+ "coherence_%": 481,
484
+ "comb": 482,
485
+ "combined_score": 483,
486
+ "comet": 484,
487
+ "comet_baseline": 485,
488
+ "comet_score": 486,
489
+ "cometh_human-only": 487,
490
+ "common_voice_irish_invalidated_281_utterances_with_lm": 488,
491
+ "common_voice_irish_invalidated_281_utterances_without_lm": 489,
492
+ "common_words_accuracy_%": 490,
493
+ "competition_rank": 491,
494
+ "competition_similarity_score": 492,
495
+ "completed_training_rounds": 493,
496
+ "compliance_rate": 494,
497
+ "compound_words_accuracy_%": 495,
498
+ "compression_ratio": 496,
499
+ "concept_preservation_cp": 497,
500
+ "concordance_correlation_coefficient_ccc": 498,
501
+ "cond": 499,
502
+ "confidence_calibration": 500,
503
+ "confidence_score": 501,
504
+ "confusion_matrix": 502,
505
+ "conn": 503,
506
+ "conn.": 504,
507
+ "consistency": 505,
508
+ "context": 506,
509
+ "coqa": 507,
510
+ "coqa_gen2mc_mc": 508,
511
+ "core_score": 509,
512
+ "corloc": 510,
513
+ "corpus_active_dims": 511,
514
+ "corpus_sparsity_ratio": 512,
515
+ "correctness": 513,
516
+ "correctness_avg._%": 514,
517
+ "corrsc": 515,
518
+ "cos_sim-map@100": 516,
519
+ "cos_sim-mrr@10": 517,
520
+ "cos_sim-ndcg@10": 518,
521
+ "cos_sim-recall@5": 519,
522
+ "cos_sim_accuracy": 520,
523
+ "cos_sim_accuracy@1": 521,
524
+ "cos_sim_accuracy@10": 522,
525
+ "cos_sim_accuracy@3": 523,
526
+ "cos_sim_accuracy@5": 524,
527
+ "cos_sim_accuracy_threshold": 525,
528
+ "cos_sim_ap": 526,
529
+ "cos_sim_f1": 527,
530
+ "cos_sim_f1_threshold": 528,
531
+ "cos_sim_map@100": 529,
532
+ "cos_sim_mrr@10": 530,
533
+ "cos_sim_ndcg@10": 531,
534
+ "cos_sim_pearson": 532,
535
+ "cos_sim_precision": 533,
536
+ "cos_sim_precision@1": 534,
537
+ "cos_sim_precision@10": 535,
538
+ "cos_sim_precision@3": 536,
539
+ "cos_sim_precision@5": 537,
540
+ "cos_sim_recall": 538,
541
+ "cos_sim_recall@1": 539,
542
+ "cos_sim_recall@10": 540,
543
+ "cos_sim_recall@3": 541,
544
+ "cos_sim_recall@5": 542,
545
+ "cos_sim_spearman": 543,
546
+ "cosine_accuracy": 544,
547
+ "cosine_accuracy@1": 545,
548
+ "cosine_accuracy@10": 546,
549
+ "cosine_accuracy@100": 547,
550
+ "cosine_accuracy@1000": 548,
551
+ "cosine_accuracy@12": 549,
552
+ "cosine_accuracy@15": 550,
553
+ "cosine_accuracy@150": 551,
554
+ "cosine_accuracy@2": 552,
555
+ "cosine_accuracy@20": 553,
556
+ "cosine_accuracy@200": 554,
557
+ "cosine_accuracy@25": 555,
558
+ "cosine_accuracy@3": 556,
559
+ "cosine_accuracy@30": 557,
560
+ "cosine_accuracy@300": 558,
561
+ "cosine_accuracy@5": 559,
562
+ "cosine_accuracy@50": 560,
563
+ "cosine_accuracy@500": 561,
564
+ "cosine_accuracy@7": 562,
565
+ "cosine_accuracy_evaluation": 563,
566
+ "cosine_accuracy_on_dev": 564,
567
+ "cosine_accuracy_on_test": 565,
568
+ "cosine_accuracy_threshold": 566,
569
+ "cosine_ap": 567,
570
+ "cosine_auc_precision_cache_hit_ratio": 568,
571
+ "cosine_auc_similarity_distribution": 569,
572
+ "cosine_f1": 570,
573
+ "cosine_f1_threshold": 571,
574
+ "cosine_map@1": 572,
575
+ "cosine_map@10": 573,
576
+ "cosine_map@100": 574,
577
+ "cosine_map@1000": 575,
578
+ "cosine_map@12": 576,
579
+ "cosine_map@150": 577,
580
+ "cosine_map@20": 578,
581
+ "cosine_map@200": 579,
582
+ "cosine_map@25": 580,
583
+ "cosine_map@3": 581,
584
+ "cosine_map@300": 582,
585
+ "cosine_map@5": 583,
586
+ "cosine_map@50": 584,
587
+ "cosine_map@500": 585,
588
+ "cosine_mcc": 586,
589
+ "cosine_mrr@1": 587,
590
+ "cosine_mrr@10": 588,
591
+ "cosine_mrr@100": 589,
592
+ "cosine_mrr@1000": 590,
593
+ "cosine_mrr@150": 591,
594
+ "cosine_mrr@2": 592,
595
+ "cosine_mrr@20": 593,
596
+ "cosine_mrr@200": 594,
597
+ "cosine_mrr@25": 595,
598
+ "cosine_mrr@3": 596,
599
+ "cosine_mrr@30": 597,
600
+ "cosine_mrr@300": 598,
601
+ "cosine_mrr@5": 599,
602
+ "cosine_mrr@50": 600,
603
+ "cosine_mrr@500": 601,
604
+ "cosine_ndcg@1": 602,
605
+ "cosine_ndcg@10": 603,
606
+ "cosine_ndcg@100": 604,
607
+ "cosine_ndcg@1000": 605,
608
+ "cosine_ndcg@15": 606,
609
+ "cosine_ndcg@150": 607,
610
+ "cosine_ndcg@20": 608,
611
+ "cosine_ndcg@200": 609,
612
+ "cosine_ndcg@25": 610,
613
+ "cosine_ndcg@3": 611,
614
+ "cosine_ndcg@30": 612,
615
+ "cosine_ndcg@300": 613,
616
+ "cosine_ndcg@5": 614,
617
+ "cosine_ndcg@50": 615,
618
+ "cosine_ndcg@500": 616,
619
+ "cosine_ndcg@7": 617,
620
+ "cosine_pearson": 618,
621
+ "cosine_precision": 619,
622
+ "cosine_precision@1": 620,
623
+ "cosine_precision@10": 621,
624
+ "cosine_precision@100": 622,
625
+ "cosine_precision@1000": 623,
626
+ "cosine_precision@12": 624,
627
+ "cosine_precision@15": 625,
628
+ "cosine_precision@150": 626,
629
+ "cosine_precision@2": 627,
630
+ "cosine_precision@20": 628,
631
+ "cosine_precision@200": 629,
632
+ "cosine_precision@25": 630,
633
+ "cosine_precision@3": 631,
634
+ "cosine_precision@30": 632,
635
+ "cosine_precision@300": 633,
636
+ "cosine_precision@5": 634,
637
+ "cosine_precision@50": 635,
638
+ "cosine_precision@500": 636,
639
+ "cosine_precision@7": 637,
640
+ "cosine_recall": 638,
641
+ "cosine_recall@1": 639,
642
+ "cosine_recall@10": 640,
643
+ "cosine_recall@100": 641,
644
+ "cosine_recall@1000": 642,
645
+ "cosine_recall@12": 643,
646
+ "cosine_recall@15": 644,
647
+ "cosine_recall@150": 645,
648
+ "cosine_recall@2": 646,
649
+ "cosine_recall@20": 647,
650
+ "cosine_recall@200": 648,
651
+ "cosine_recall@25": 649,
652
+ "cosine_recall@3": 650,
653
+ "cosine_recall@30": 651,
654
+ "cosine_recall@300": 652,
655
+ "cosine_recall@5": 653,
656
+ "cosine_recall@50": 654,
657
+ "cosine_recall@500": 655,
658
+ "cosine_recall@7": 656,
659
+ "cosine_similarity": 657,
660
+ "cosine_similarity_score": 658,
661
+ "cosine_spearman": 659,
662
+ "cot": 660,
663
+ "cot_acc": 661,
664
+ "cot_em": 662,
665
+ "count": 663,
666
+ "coverage": 664,
667
+ "coverage_$": 665,
668
+ "coverage_$.": 666,
669
+ "coverage_adja": 667,
670
+ "coverage_adjd": 668,
671
+ "coverage_adv": 669,
672
+ "coverage_appo": 670,
673
+ "coverage_appr": 671,
674
+ "coverage_apprart": 672,
675
+ "coverage_apzr": 673,
676
+ "coverage_art": 674,
677
+ "coverage_card": 675,
678
+ "coverage_fm": 676,
679
+ "coverage_itj": 677,
680
+ "coverage_kokom": 678,
681
+ "coverage_kon": 679,
682
+ "coverage_koui": 680,
683
+ "coverage_kous": 681,
684
+ "coverage_ne": 682,
685
+ "coverage_nn": 683,
686
+ "coverage_pdat": 684,
687
+ "coverage_pds": 685,
688
+ "coverage_piat": 686,
689
+ "coverage_pidat": 687,
690
+ "coverage_pis": 688,
691
+ "coverage_pper": 689,
692
+ "coverage_pposat": 690,
693
+ "coverage_pposs": 691,
694
+ "coverage_prelat": 692,
695
+ "coverage_prels": 693,
696
+ "coverage_prf": 694,
697
+ "coverage_proav": 695,
698
+ "coverage_ptka": 696,
699
+ "coverage_ptkant": 697,
700
+ "coverage_ptkneg": 698,
701
+ "coverage_ptkvz": 699,
702
+ "coverage_ptkzu": 700,
703
+ "coverage_pwat": 701,
704
+ "coverage_pwav": 702,
705
+ "coverage_pws": 703,
706
+ "coverage_vafin": 704,
707
+ "coverage_vaimp": 705,
708
+ "coverage_vainf": 706,
709
+ "coverage_vapp": 707,
710
+ "coverage_vmfin": 708,
711
+ "coverage_vminf": 709,
712
+ "coverage_vmpp": 710,
713
+ "coverage_vvfin": 711,
714
+ "coverage_vvimp": 712,
715
+ "coverage_vvinf": 713,
716
+ "coverage_vvizu": 714,
717
+ "coverage_vvpp": 715,
718
+ "coverage_xy": 716,
719
+ "covid-19_accuracy": 717,
720
+ "cross-context_retrieval": 718,
721
+ "cross-validation_roc-auc": 719,
722
+ "cross_entropy_loss": 720,
723
+ "csqa_mc": 721,
724
+ "cumulative": 722,
725
+ "cumulative_reward": 723,
726
+ "current_eval_reward": 724,
727
+ "d1-all": 725,
728
+ "d_bert_:_f1": 726,
729
+ "da_vqa_score": 727,
730
+ "dapo_accuracy": 728,
731
+ "dataset_size": 729,
732
+ "dataset_size_gb": 730,
733
+ "decode_latency_ms": 731,
734
+ "deepmind_math": 732,
735
+ "deepseek_leetcode": 733,
736
+ "deepslot_f1": 734,
737
+ "delta": 735,
738
+ "delta_%": 736,
739
+ "dense_acc": 737,
740
+ "der_%": 738,
741
+ "description_accuracy": 739,
742
+ "detection_auroc": 740,
743
+ "detection_auroc_severity_0": 741,
744
+ "detection_rate": 742,
745
+ "deterministic_format_accuracy_\"exactamente_n\"": 743,
746
+ "dev16_cer": 744,
747
+ "dev16_wer": 745,
748
+ "dev_accuracy": 746,
749
+ "dev_cer": 747,
750
+ "dev_cer_+lm": 748,
751
+ "dev_cer_with_lm": 749,
752
+ "dev_cer_without_lm": 750,
753
+ "dev_macro_f1_score": 751,
754
+ "dev_macro_precision": 752,
755
+ "dev_macro_recall": 753,
756
+ "dev_wer": 754,
757
+ "dev_wer_+lm": 755,
758
+ "dev_wer_with_lm": 756,
759
+ "dev_wer_without_lm": 757,
760
+ "devops_relevance_score_0-10": 758,
761
+ "diagnostic_coherence_score": 759,
762
+ "dice": 760,
763
+ "dice-score": 761,
764
+ "dice_average": 762,
765
+ "dice_coefficient": 763,
766
+ "dice_score": 764,
767
+ "dim": 765,
768
+ "direct": 766,
769
+ "direction_accuracy_avg": 767,
770
+ "direction_accuracy_best": 768,
771
+ "distractor_accuracy": 769,
772
+ "diversity": 770,
773
+ "dnsmos_bak": 771,
774
+ "dnsmos_ovrl": 772,
775
+ "dnsmos_sig": 773,
776
+ "dot_accuracy": 774,
777
+ "dot_accuracy@1": 775,
778
+ "dot_accuracy@10": 776,
779
+ "dot_accuracy@100": 777,
780
+ "dot_accuracy@2": 778,
781
+ "dot_accuracy@3": 779,
782
+ "dot_accuracy@30": 780,
783
+ "dot_accuracy@5": 781,
784
+ "dot_accuracy@50": 782,
785
+ "dot_accuracy@8": 783,
786
+ "dot_accuracy_10": 784,
787
+ "dot_accuracy_threshold": 785,
788
+ "dot_ap": 786,
789
+ "dot_f1": 787,
790
+ "dot_f1_threshold": 788,
791
+ "dot_map@10": 789,
792
+ "dot_map@100": 790,
793
+ "dot_map@60": 791,
794
+ "dot_map_60": 792,
795
+ "dot_mcc": 793,
796
+ "dot_mrr@1": 794,
797
+ "dot_mrr@10": 795,
798
+ "dot_mrr@100": 796,
799
+ "dot_mrr@2": 797,
800
+ "dot_mrr@200": 798,
801
+ "dot_mrr@5": 799,
802
+ "dot_mrr_10": 800,
803
+ "dot_ndcg@1": 801,
804
+ "dot_ndcg@10": 802,
805
+ "dot_ndcg@100": 803,
806
+ "dot_ndcg@5": 804,
807
+ "dot_ndcg_10": 805,
808
+ "dot_pearson": 806,
809
+ "dot_precision": 807,
810
+ "dot_precision@1": 808,
811
+ "dot_precision@10": 809,
812
+ "dot_precision@100": 810,
813
+ "dot_precision@2": 811,
814
+ "dot_precision@3": 812,
815
+ "dot_precision@30": 813,
816
+ "dot_precision@5": 814,
817
+ "dot_precision@50": 815,
818
+ "dot_precision@8": 816,
819
+ "dot_precision_10": 817,
820
+ "dot_recall": 818,
821
+ "dot_recall@1": 819,
822
+ "dot_recall@10": 820,
823
+ "dot_recall@100": 821,
824
+ "dot_recall@2": 822,
825
+ "dot_recall@3": 823,
826
+ "dot_recall@30": 824,
827
+ "dot_recall@5": 825,
828
+ "dot_recall@50": 826,
829
+ "dot_recall@8": 827,
830
+ "dot_recall_10": 828,
831
+ "dot_score-map@100": 829,
832
+ "dot_score-mrr@10": 830,
833
+ "dot_score-ndcg@10": 831,
834
+ "dot_score-recall@5": 832,
835
+ "dot_score_accuracy@10": 833,
836
+ "dot_score_map@10": 834,
837
+ "dot_score_mrr@10": 835,
838
+ "dot_score_ndcg@10": 836,
839
+ "dot_score_precision@10": 837,
840
+ "dot_score_recall@10": 838,
841
+ "dot_sim_accuracy": 839,
842
+ "dot_sim_ap": 840,
843
+ "dot_spearman": 841,
844
+ "drilling_calculations_accuracy": 842,
845
+ "drop": 843,
846
+ "drop_3-shot": 844,
847
+ "drop_gen2mc_mc": 845,
848
+ "dropout": 846,
849
+ "ds_1000": 847,
850
+ "dsc": 848,
851
+ "dynamics_model_mse_loss": 849,
852
+ "e/i_accuracy": 850,
853
+ "eao": 851,
854
+ "ecthr_a": 852,
855
+ "edit-smiliarity": 853,
856
+ "eer": 854,
857
+ "eer_%": 855,
858
+ "element_iou": 856,
859
+ "elo": 857,
860
+ "elo_rating": 858,
861
+ "em@5_baseline": 859,
862
+ "em@5_with_instructions": 860,
863
+ "em_3-shot": 861,
864
+ "em_line-level": 862,
865
+ "em_maj1@1": 863,
866
+ "em_\u2264_8k": 864,
867
+ "embedding_dimension": 865,
868
+ "embedding_dropout": 866,
869
+ "emergence_detection_f1": 867,
870
+ "emergence_detection_rate": 868,
871
+ "emotion_top-3_accuracy": 869,
872
+ "emotionclassification": 870,
873
+ "empos": 871,
874
+ "emr": 872,
875
+ "en_content_to_title_acc": 873,
876
+ "en_title_to_content_acc": 874,
877
+ "engineering_document_retrieval_precision": 875,
878
+ "english_to_chinese": 876,
879
+ "english_to_sanskrit_translation_-_bleu_score": 877,
880
+ "english_to_sanskrit_translation_-_jaccard_similarity": 878,
881
+ "entity_span_f1_test_2020": 879,
882
+ "entity_span_f1_test_2021": 880,
883
+ "entity_span_precision_test_2020": 881,
884
+ "entity_span_recall_test_2020": 882,
885
+ "entity_span_recall_test_2021": 883,
886
+ "entropy": 884,
887
+ "entropy_novelty": 885,
888
+ "ents_f": 886,
889
+ "ents_p": 887,
890
+ "ents_r": 888,
891
+ "epe": 889,
892
+ "epoch": 890,
893
+ "eq-bench_0-shot": 891,
894
+ "eq-bench_score": 892,
895
+ "eqbench": 893,
896
+ "erqav2.0": 894,
897
+ "error": 895,
898
+ "error_rate": 896,
899
+ "error_ratio": 897,
900
+ "euclidean_accuracy": 898,
901
+ "euclidean_accuracy_threshold": 899,
902
+ "euclidean_ap": 900,
903
+ "euclidean_f1": 901,
904
+ "euclidean_f1_threshold": 902,
905
+ "euclidean_mcc": 903,
906
+ "euclidean_pearson": 904,
907
+ "euclidean_precision": 905,
908
+ "euclidean_recall": 906,
909
+ "euclidean_spearman": 907,
910
+ "eud_jaccard": 908,
911
+ "eval_accuracy": 909,
912
+ "eval_bertscore_f1": 910,
913
+ "eval_bleu": 911,
914
+ "eval_cer": 912,
915
+ "eval_chrf": 913,
916
+ "eval_em": 914,
917
+ "eval_exact": 915,
918
+ "eval_exactmatch_score_squad_metric": 916,
919
+ "eval_f1": 917,
920
+ "eval_f1_score_squad_metric": 918,
921
+ "eval_hasans_exact": 919,
922
+ "eval_hasans_f1": 920,
923
+ "eval_loss": 921,
924
+ "eval_loss_best": 922,
925
+ "eval_noans_exact": 923,
926
+ "eval_noans_f1": 924,
927
+ "eval_perplexity": 925,
928
+ "eval_precision": 926,
929
+ "eval_recall": 927,
930
+ "eval_runtime": 928,
931
+ "eval_samples_per_second": 929,
932
+ "eval_steps_per_second": 930,
933
+ "eval_time": 931,
934
+ "eval_wer": 932,
935
+ "evaluation_accuracy": 933,
936
+ "evaluation_loss": 934,
937
+ "evaluation_macro_f1": 935,
938
+ "evaluation_macro_precision": 936,
939
+ "evaluation_macro_recall": 937,
940
+ "evaluation_micro_f1": 938,
941
+ "evaluation_micro_precision": 939,
942
+ "evaluation_micro_recall": 940,
943
+ "evaluation_runtime_seconds": 941,
944
+ "evaluation_samples_per_second": 942,
945
+ "evaluation_steps_per_second": 943,
946
+ "evaluation_weighted_f1": 944,
947
+ "evaluation_weighted_precision": 945,
948
+ "evaluation_weighted_recall": 946,
949
+ "exact": 947,
950
+ "exact-match": 948,
951
+ "exact_macth": 949,
952
+ "exact_match": 950,
953
+ "exact_match@16k": 951,
954
+ "exact_match@32k": 952,
955
+ "exact_match@4k": 953,
956
+ "exact_match@8k": 954,
957
+ "exact_match_%": 955,
958
+ "exact_match_accuracy": 956,
959
+ "exact_match_accuracy_dev": 957,
960
+ "exact_match_accuracy_in_dev": 958,
961
+ "exact_match_em": 959,
962
+ "exact_match_flexible": 960,
963
+ "exact_match_flexible-extract": 961,
964
+ "exact_match_none": 962,
965
+ "exact_match_none_gsm8k_0shot_instruct": 963,
966
+ "exact_match_none_meta_math_0shot_instruct": 964,
967
+ "exact_match_none_meta_math_hard_0shot_instruct": 965,
968
+ "exact_match_strict": 966,
969
+ "exact_match_strict-match": 967,
970
+ "exact_match_strict-match_ceval-valid-pretrain-cot_zh": 968,
971
+ "exact_match_strict-match_cmmlu_pretrain_cot_zh": 969,
972
+ "exact_match_strict-match_meta_arc_0shot_instruct": 970,
973
+ "exact_match_strict-match_meta_bbh_3shot_cot_pretrain": 971,
974
+ "exact_match_strict-match_meta_gpqa_0shot_cot_instruct": 972,
975
+ "exact_match_strict-match_meta_mmlu_0shot_instruct": 973,
976
+ "exact_match_strict-match_meta_mmlu_pro_5shot_instruct": 974,
977
+ "exact_match_strict-match_meta_mmlu_pro_5shot_pretrain": 975,
978
+ "exact_match_strict-match_meta_pretrain": 976,
979
+ "exact_match_strict-match_original_capability_instruct": 977,
980
+ "exact_match_strict-match_zh_pretrain_multishot": 978,
981
+ "exact_span_f1": 979,
982
+ "exact_string_match": 980,
983
+ "example-level_f1": 981,
984
+ "example_f1": 982,
985
+ "execution_accuracy": 983,
986
+ "execution_accuracy_%_dev": 984,
987
+ "expected_average_overlap_eao": 985,
988
+ "expert_dim": 986,
989
+ "expert_effectiveness_score": 987,
990
+ "expert_rating": 988,
991
+ "expguardtest_total_f1": 989,
992
+ "extact_match": 990,
993
+ "extraction": 991,
994
+ "f-measure": 992,
995
+ "f-measure_mean": 993,
996
+ "f-measure_seen": 994,
997
+ "f-measure_unseen": 995,
998
+ "f-score": 996,
999
+ "f0.5": 997,
1000
+ "f1": 998,
1001
+ "f1-macro": 999,
1002
+ "f1-score": 1000,
1003
+ "f1-score_%": 1001,
1004
+ "f1-score_dice_coefficient": 1002,
1005
+ "f1-score_macro": 1003,
1006
+ "f1-score_weighted": 1004,
1007
+ "f1-weighted": 1005,
1008
+ "f1@10": 1006,
1009
+ "f1@5": 1007,
1010
+ "f1@m": 1008,
1011
+ "f1_%": 1009,
1012
+ "f1_'bezeichnung'_macro": 1010,
1013
+ "f1_'thema'_macro": 1011,
1014
+ "f1_10-fold": 1012,
1015
+ "f1_20-vote": 1013,
1016
+ "f1_3-shot": 1014,
1017
+ "f1_admiration": 1015,
1018
+ "f1_af": 1016,
1019
+ "f1_amusement": 1017,
1020
+ "f1_anger": 1018,
1021
+ "f1_annoyance": 1019,
1022
+ "f1_approval": 1020,
1023
+ "f1_avg": 1021,
1024
+ "f1_caring": 1022,
1025
+ "f1_class_negative": 1023,
1026
+ "f1_class_positive": 1024,
1027
+ "f1_confusion": 1025,
1028
+ "f1_constructive": 1026,
1029
+ "f1_covid-19": 1027,
1030
+ "f1_curiosity": 1028,
1031
+ "f1_desire": 1029,
1032
+ "f1_disappointment": 1030,
1033
+ "f1_disapproval": 1031,
1034
+ "f1_disgust": 1032,
1035
+ "f1_embarrassment": 1033,
1036
+ "f1_entity_span": 1034,
1037
+ "f1_excitement": 1035,
1038
+ "f1_fear": 1036,
1039
+ "f1_gratitude": 1037,
1040
+ "f1_grief": 1038,
1041
+ "f1_healthy": 1039,
1042
+ "f1_instrument": 1040,
1043
+ "f1_joy": 1041,
1044
+ "f1_love": 1042,
1045
+ "f1_macro": 1043,
1046
+ "f1_macro_avg.": 1044,
1047
+ "f1_micro": 1045,
1048
+ "f1_micro_avg": 1046,
1049
+ "f1_negative": 1047,
1050
+ "f1_nervousness": 1048,
1051
+ "f1_neutral": 1049,
1052
+ "f1_nuclearity": 1050,
1053
+ "f1_optimism": 1051,
1054
+ "f1_positive": 1052,
1055
+ "f1_pride": 1053,
1056
+ "f1_r15": 1054,
1057
+ "f1_r16": 1055,
1058
+ "f1_realization": 1056,
1059
+ "f1_relation": 1057,
1060
+ "f1_relief": 1058,
1061
+ "f1_remorse": 1059,
1062
+ "f1_sadness": 1060,
1063
+ "f1_samples": 1061,
1064
+ "f1_score_%": 1062,
1065
+ "f1_score_5-fold": 1063,
1066
+ "f1_score_decimal": 1064,
1067
+ "f1_score_macro": 1065,
1068
+ "f1_score_macro_avg": 1066,
1069
+ "f1_score_micro": 1067,
1070
+ "f1_score_queue": 1068,
1071
+ "f1_score_strong_class": 1069,
1072
+ "f1_score_threshold=0.94": 1070,
1073
+ "f1_score_toxic_class": 1071,
1074
+ "f1_score_type": 1072,
1075
+ "f1_score_weighted": 1073,
1076
+ "f1_seqeval": 1074,
1077
+ "f1_span": 1075,
1078
+ "f1_stderr": 1076,
1079
+ "f1_surprise": 1077,
1080
+ "f1_symptomatic": 1078,
1081
+ "f1_target": 1079,
1082
+ "f1_test_2020": 1080,
1083
+ "f1_test_2021": 1081,
1084
+ "f1_threshold": 1082,
1085
+ "f1_trolling": 1083,
1086
+ "f1_verb": 1084,
1087
+ "f1_weighted": 1085,
1088
+ "f1_weighted_avg": 1086,
1089
+ "f1_weighted_quantized": 1087,
1090
+ "f1neg": 1088,
1091
+ "f1pos": 1089,
1092
+ "f2": 1090,
1093
+ "factspotter": 1091,
1094
+ "factual_accuracy": 1092,
1095
+ "fad": 1093,
1096
+ "fake_acc": 1094,
1097
+ "false_accuracy": 1095,
1098
+ "false_positive_rate": 1096,
1099
+ "far": 1097,
1100
+ "fast_1": 1098,
1101
+ "few-shot": 1099,
1102
+ "fid": 1100,
1103
+ "fid_flexvar-d16_+sar": 1101,
1104
+ "fid_flexvar-d20_+sar": 1102,
1105
+ "fid_flexvar-d24_+sar": 1103,
1106
+ "figure": 1104,
1107
+ "final_em": 1105,
1108
+ "final_eval_bertscore_f1": 1106,
1109
+ "final_eval_bleu": 1107,
1110
+ "final_eval_chrf": 1108,
1111
+ "final_eval_loss": 1109,
1112
+ "final_loss": 1110,
1113
+ "final_test_wer": 1111,
1114
+ "final_training_loss": 1112,
1115
+ "final_validation_loss": 1113,
1116
+ "finance_f1": 1114,
1117
+ "first_pass_exact_match": 1115,
1118
+ "first_turn": 1116,
1119
+ "fitness": 1117,
1120
+ "fl-all": 1118,
1121
+ "fleurs-test-bleu": 1119,
1122
+ "fleurs-test-cer": 1120,
1123
+ "fleurs-test-wer": 1121,
1124
+ "flexible-extract": 1122,
1125
+ "float32_cosine_accuracy@1": 1123,
1126
+ "float32_cosine_accuracy@10": 1124,
1127
+ "float32_cosine_accuracy@3": 1125,
1128
+ "float32_cosine_accuracy@5": 1126,
1129
+ "float32_cosine_map@100": 1127,
1130
+ "float32_cosine_mrr@10": 1128,
1131
+ "float32_cosine_ndcg@10": 1129,
1132
+ "float32_cosine_precision@1": 1130,
1133
+ "float32_cosine_precision@10": 1131,
1134
+ "float32_cosine_precision@3": 1132,
1135
+ "float32_cosine_precision@5": 1133,
1136
+ "float32_cosine_recall@1": 1134,
1137
+ "float32_cosine_recall@10": 1135,
1138
+ "float32_cosine_recall@3": 1136,
1139
+ "float32_cosine_recall@5": 1137,
1140
+ "fn": 1138,
1141
+ "focalloss": 1139,
1142
+ "format_compliance_rate": 1140,
1143
+ "fp": 1141,
1144
+ "fpr95": 1142,
1145
+ "fps": 1143,
1146
+ "fragmergent_coherence": 1144,
1147
+ "frame_accuracy": 1145,
1148
+ "framework_accuracy": 1146,
1149
+ "frr": 1147,
1150
+ "fscore": 1148,
1151
+ "function_call_accuracy": 1149,
1152
+ "function_calling_accuracy_name_&_arguments": 1150,
1153
+ "funny_class_accuracy": 1151,
1154
+ "fuzzy_score": 1152,
1155
+ "fvd16": 1153,
1156
+ "fw_iou": 1154,
1157
+ "g": 1155,
1158
+ "gen_len": 1156,
1159
+ "gender_acc": 1157,
1160
+ "gender_accuracy": 1158,
1161
+ "gender_consistency": 1159,
1162
+ "generated_length": 1160,
1163
+ "generating_communicative_text.f1_score": 1161,
1164
+ "generating_communicative_text.precision": 1162,
1165
+ "generating_communicative_text.recall": 1163,
1166
+ "generating_communicative_text.support": 1164,
1167
+ "generating_creative_text.f1_score": 1165,
1168
+ "generating_creative_text.precision": 1166,
1169
+ "generating_creative_text.recall": 1167,
1170
+ "generating_creative_text.support": 1168,
1171
+ "gflops": 1169,
1172
+ "global_accuracy": 1170,
1173
+ "global_strict_f1": 1171,
1174
+ "glue": 1172,
1175
+ "go": 1173,
1176
+ "google_speech_commands_v2_35": 1174,
1177
+ "gp_test": 1175,
1178
+ "gp_val": 1176,
1179
+ "gpqa": 1177,
1180
+ "gpt-3.5_score": 1178,
1181
+ "gpt-4": 1179,
1182
+ "gpt-4_as_judge": 1180,
1183
+ "gpt-4_score": 1181,
1184
+ "gpt-4_score_bbox": 1182,
1185
+ "gpt-score": 1183,
1186
+ "gpu_memory_usage_mb": 1184,
1187
+ "group_score": 1185,
1188
+ "grpo_accuracy": 1186,
1189
+ "gsm8k": 1187,
1190
+ "gsm8k_0-shot": 1188,
1191
+ "gsm8k_5-shot": 1189,
1192
+ "gsm8k_accuracy": 1190,
1193
+ "gsm8k_few-shot": 1191,
1194
+ "gsm8k_score": 1192,
1195
+ "hallucination_f1": 1193,
1196
+ "hallucination_rate": 1194,
1197
+ "hallucination_reduction_%": 1195,
1198
+ "hallucination_reduction_near-ood": 1196,
1199
+ "hamming_accuracy": 1197,
1200
+ "hamming_loss": 1198,
1201
+ "hamming_score": 1199,
1202
+ "hard": 1200,
1203
+ "harmbench_f1": 1201,
1204
+ "harmonic_mean": 1202,
1205
+ "harmony_and_consonance": 1203,
1206
+ "hasans_exact": 1204,
1207
+ "hasans_f1": 1205,
1208
+ "hasans_total": 1206,
1209
+ "healthcare_f1": 1207,
1210
+ "healthy_accuracy": 1208,
1211
+ "hebrew_answers": 1209,
1212
+ "hellaswag": 1210,
1213
+ "hellaswag_0-shot": 1211,
1214
+ "hellaswag_10-shot": 1212,
1215
+ "hellaswag_rc": 1213,
1216
+ "hellaswag_score": 1214,
1217
+ "hhem_consistency": 1215,
1218
+ "hit@10": 1216,
1219
+ "hit@5": 1217,
1220
+ "hits@1": 1218,
1221
+ "hle": 1219,
1222
+ "homework_problem.f1_score": 1220,
1223
+ "homework_problem.precision": 1221,
1224
+ "homework_problem.recall": 1222,
1225
+ "homework_problem.support": 1223,
1226
+ "hota": 1224,
1227
+ "hota_all": 1225,
1228
+ "human-gpt_detection_validation_loss": 1226,
1229
+ "human_%": 1227,
1230
+ "human_explanation_rating": 1228,
1231
+ "human_preference_elo_rating": 1229,
1232
+ "human_preference_rate": 1230,
1233
+ "human_preference_vs_elevenlabs": 1231,
1234
+ "humaneval": 1232,
1235
+ "humaneval_pass@1": 1233,
1236
+ "humanities": 1234,
1237
+ "iae": 1235,
1238
+ "icat_score": 1236,
1239
+ "icbhi_score": 1237,
1240
+ "idf1": 1238,
1241
+ "ifbench": 1239,
1242
+ "ifeval": 1240,
1243
+ "image-to-sound_r@100": 1241,
1244
+ "image-to-text_r@1": 1242,
1245
+ "image-to-text_r@10": 1243,
1246
+ "image-to-text_r@5": 1244,
1247
+ "image_retrieval_r@1": 1245,
1248
+ "imagenet_acc.": 1246,
1249
+ "imagenet_dist._shift.": 1247,
1250
+ "imagenet_top-1_accuracy": 1248,
1251
+ "imagereward": 1249,
1252
+ "implicit_social_group_reference_seqeval": 1250,
1253
+ "improvement": 1251,
1254
+ "in-1k_top-1_acc._%": 1252,
1255
+ "in-1k_zero-shot_top-1_acc._%": 1253,
1256
+ "inception_score": 1254,
1257
+ "inference-latency_ms/sample": 1255,
1258
+ "inference_latency_ms": 1256,
1259
+ "inference_speed": 1257,
1260
+ "inference_speed_sec": 1258,
1261
+ "inference_steps": 1259,
1262
+ "inference_success_rate": 1260,
1263
+ "inference_text/sec_a100_40gb_gpu_batch=128": 1261,
1264
+ "inference_text/sec_a100_40gb_gpu_batch=32": 1262,
1265
+ "inference_text/sec_a100_batch=64": 1263,
1266
+ "inference_text/sec_a10g_batch=128": 1264,
1267
+ "inference_text/sec_a10g_gpu_batch=128": 1265,
1268
+ "inference_time": 1266,
1269
+ "inference_time_ms": 1267,
1270
+ "information_retrieval": 1268,
1271
+ "information_search.f1_score": 1269,
1272
+ "information_search.precision": 1270,
1273
+ "information_search.recall": 1271,
1274
+ "information_search.support": 1272,
1275
+ "inst-level_loose-accuracy": 1273,
1276
+ "inst_level_loose_acc": 1274,
1277
+ "inst_level_strict_acc": 1275,
1278
+ "instruction-following-score": 1276,
1279
+ "instruction_accuracy": 1277,
1280
+ "instruction_level_loose_accuracy": 1278,
1281
+ "instruction_level_strict_accuracy": 1279,
1282
+ "int8_cosine_accuracy@1": 1280,
1283
+ "int8_cosine_accuracy@10": 1281,
1284
+ "int8_cosine_accuracy@3": 1282,
1285
+ "int8_cosine_accuracy@5": 1283,
1286
+ "int8_cosine_map@100": 1284,
1287
+ "int8_cosine_mrr@10": 1285,
1288
+ "int8_cosine_ndcg@10": 1286,
1289
+ "int8_cosine_precision@1": 1287,
1290
+ "int8_cosine_precision@10": 1288,
1291
+ "int8_cosine_precision@3": 1289,
1292
+ "int8_cosine_precision@5": 1290,
1293
+ "int8_cosine_recall@1": 1291,
1294
+ "int8_cosine_recall@10": 1292,
1295
+ "int8_cosine_recall@3": 1293,
1296
+ "int8_cosine_recall@5": 1294,
1297
+ "intent_accuracy": 1295,
1298
+ "intent_classification_macro_f1_%": 1296,
1299
+ "intercode-alfa": 1297,
1300
+ "internal_consistency": 1298,
1301
+ "internal_tag_leakage": 1299,
1302
+ "international_law": 1300,
1303
+ "interpolation_error": 1301,
1304
+ "intersection_over_union": 1302,
1305
+ "introductory_pass@1": 1303,
1306
+ "invalid_move_rate_imr": 1304,
1307
+ "iou": 1305,
1308
+ "iou_%": 1306,
1309
+ "iou_agricultural_land": 1307,
1310
+ "iou_bare_soil": 1308,
1311
+ "iou_brushwood": 1309,
1312
+ "iou_building": 1310,
1313
+ "iou_buildings": 1311,
1314
+ "iou_coniferous": 1312,
1315
+ "iou_deciduous": 1313,
1316
+ "iou_greenhouse": 1314,
1317
+ "iou_herbaceous_vegetation": 1315,
1318
+ "iou_impervious_surface": 1316,
1319
+ "iou_jaccard_index": 1317,
1320
+ "iou_pervious_surface": 1318,
1321
+ "iou_plowed_land": 1319,
1322
+ "iou_score": 1320,
1323
+ "iou_snow": 1321,
1324
+ "iou_swimming_pool": 1322,
1325
+ "iou_vineyard": 1323,
1326
+ "iou_water": 1324,
1327
+ "ip_partial_f1": 1325,
1328
+ "ip_strict_f1": 1326,
1329
+ "is": 1327,
1330
+ "isco_hierarchical_accuracy": 1328,
1331
+ "ise": 1329,
1332
+ "itae": 1330,
1333
+ "j&f": 1331,
1334
+ "j/p_accuracy": 1332,
1335
+ "jaccard": 1333,
1336
+ "jaccard_index": 1334,
1337
+ "jaccard_seen": 1335,
1338
+ "jeopardy": 1336,
1339
+ "jeopardy_gen2mc_mc": 1337,
1340
+ "joint_validation_accuracy": 1338,
1341
+ "jurisprudence": 1339,
1342
+ "kaggle_public_score_rmsle_best_submission": 1340,
1343
+ "kannada_wer": 1341,
1344
+ "kendall's_tau": 1342,
1345
+ "kendall's_tau-c": 1343,
1346
+ "kendall's_tau_coefficient": 1344,
1347
+ "kl_divergence": 1345,
1348
+ "korean_response_ratio": 1346,
1349
+ "kv_partial_f1": 1347,
1350
+ "kv_strict_f1": 1348,
1351
+ "l2_error": 1349,
1352
+ "l2q@15": 1350,
1353
+ "labeled_attachment_score_las": 1351,
1354
+ "labelled_attachment_score": 1352,
1355
+ "lambada": 1353,
1356
+ "lambada_acc": 1354,
1357
+ "lambada_ppl": 1355,
1358
+ "lambda": 1356,
1359
+ "las": 1357,
1360
+ "last_k_layers": 1358,
1361
+ "latency_full": 1359,
1362
+ "latency_in_seconds": 1360,
1363
+ "latency_merging_ms": 1361,
1364
+ "latency_ms": 1362,
1365
+ "latency_ms/token": 1363,
1366
+ "latency_ms_-_img": 1364,
1367
+ "latency_ms_-_txt": 1365,
1368
+ "latency_ms_img": 1366,
1369
+ "latency_ms_img+txt": 1367,
1370
+ "latency_ms_img_+_txt": 1368,
1371
+ "latency_ms_txt": 1369,
1372
+ "law_f1": 1370,
1373
+ "lb_de_accuracy": 1371,
1374
+ "lb_en_accuracy": 1372,
1375
+ "lb_fr_accuracy": 1373,
1376
+ "lbpp": 1374,
1377
+ "lc_win_rate": 1375,
1378
+ "lcr": 1376,
1379
+ "ldm3d-sr-b_depth_mare": 1377,
1380
+ "ldm3d-sr-b_fid": 1378,
1381
+ "ldm3d-sr-b_is": 1379,
1382
+ "ldm3d-sr-b_psnr": 1380,
1383
+ "ldm3d-sr-b_ssim": 1381,
1384
+ "lea": 1382,
1385
+ "ledgar": 1383,
1386
+ "lemma_accuracy": 1384,
1387
+ "lemma_f1": 1385,
1388
+ "length_controlled_winrate": 1386,
1389
+ "livecodebench": 1387,
1390
+ "loc_f1-score": 1388,
1391
+ "loc_precision": 1389,
1392
+ "loc_recall": 1390,
1393
+ "localization": 1391,
1394
+ "localization_ablation": 1392,
1395
+ "log-likelihood": 1393,
1396
+ "log-spectral_distance": 1394,
1397
+ "log_fold_change_mae": 1395,
1398
+ "log_loss": 1396,
1399
+ "logistic_regression_accuracy": 1397,
1400
+ "longbook_choice/acc": 1398,
1401
+ "longbook_qa/f1": 1399,
1402
+ "loss": 1400,
1403
+ "lowest_loss": 1401,
1404
+ "lpips": 1402,
1405
+ "lpips_score": 1403,
1406
+ "lrap": 1404,
1407
+ "lstq": 1405,
1408
+ "m3exam_acc": 1406,
1409
+ "macc": 1407,
1410
+ "macro": 1408,
1411
+ "macro-average_f1-score": 1409,
1412
+ "macro-averaged_f1": 1410,
1413
+ "macro-f1": 1411,
1414
+ "macro-precision": 1412,
1415
+ "macro-recall": 1413,
1416
+ "macro_accuracy": 1414,
1417
+ "macro_auc": 1415,
1418
+ "macro_avg": 1416,
1419
+ "macro_avg/acc": 1417,
1420
+ "macro_avg_f1-score": 1418,
1421
+ "macro_f1": 1419,
1422
+ "macro_f1-score": 1420,
1423
+ "macro_f1_10-fold": 1421,
1424
+ "macro_f1_3_conditions": 1422,
1425
+ "macro_f1_avg": 1423,
1426
+ "macro_f1_cardiffnlp/tweet_sentiment_multilingual/all": 1424,
1427
+ "macro_f1_cardiffnlp/tweet_topic_multi": 1425,
1428
+ "macro_f1_cardiffnlp/tweet_topic_single": 1426,
1429
+ "macro_f1_score": 1427,
1430
+ "macro_f1_test_2020": 1428,
1431
+ "macro_f1_test_2021": 1429,
1432
+ "macro_f1_top_5_conditions": 1430,
1433
+ "macro_f1_tweet_eval/emoji": 1431,
1434
+ "macro_f1_tweet_eval/emotion": 1432,
1435
+ "macro_f1_tweet_eval/hate": 1433,
1436
+ "macro_f1_tweet_eval/irony": 1434,
1437
+ "macro_f1_tweet_eval/offensive": 1435,
1438
+ "macro_f1_tweet_eval/sentiment": 1436,
1439
+ "macro_p": 1437,
1440
+ "macro_precision": 1438,
1441
+ "macro_precision_test_2020": 1439,
1442
+ "macro_precision_test_2021": 1440,
1443
+ "macro_r": 1441,
1444
+ "macro_recall": 1442,
1445
+ "macro_recall_test_2020": 1443,
1446
+ "macro_recall_test_2021": 1444,
1447
+ "macs_image+text_g": 1445,
1448
+ "mad": 1446,
1449
+ "mae": 1447,
1450
+ "mae_60_min": 1448,
1451
+ "mae_alpha": 1449,
1452
+ "mae_original_scale_-2_to_+2": 1450,
1453
+ "mae_original_scale_0-3": 1451,
1454
+ "main_score": 1452,
1455
+ "maj@1": 1453,
1456
+ "maj@16": 1454,
1457
+ "manhattan_accuracy": 1455,
1458
+ "manhattan_accuracy_threshold": 1456,
1459
+ "manhattan_ap": 1457,
1460
+ "manhattan_f1": 1458,
1461
+ "manhattan_f1_threshold": 1459,
1462
+ "manhattan_mcc": 1460,
1463
+ "manhattan_pearson": 1461,
1464
+ "manhattan_precision": 1462,
1465
+ "manhattan_recall": 1463,
1466
+ "manhattan_spearman": 1464,
1467
+ "map": 1465,
1468
+ "map50": 1466,
1469
+ "map50-95": 1467,
1470
+ "map@0.25": 1468,
1471
+ "map@0.5": 1469,
1472
+ "map@0.50": 1470,
1473
+ "map@0.5:0.95": 1471,
1474
+ "map@0.5_box": 1472,
1475
+ "map@0.5_mask": 1473,
1476
+ "map@0.75": 1474,
1477
+ "map@1": 1475,
1478
+ "map@10": 1476,
1479
+ "map@100": 1477,
1480
+ "map@1000": 1478,
1481
+ "map@1000_miracl": 1479,
1482
+ "map@100_miracl": 1480,
1483
+ "map@10_miracl": 1481,
1484
+ "map@1_miracl": 1482,
1485
+ "map@2": 1483,
1486
+ "map@20": 1484,
1487
+ "map@200": 1485,
1488
+ "map@20_miracl": 1486,
1489
+ "map@3": 1487,
1490
+ "map@30": 1488,
1491
+ "map@300": 1489,
1492
+ "map@3_miracl": 1490,
1493
+ "map@5": 1491,
1494
+ "map@50": 1492,
1495
+ "map@50-95": 1493,
1496
+ "map@500": 1494,
1497
+ "map@5_miracl": 1495,
1498
+ "map@7": 1496,
1499
+ "map@70": 1497,
1500
+ "map@700": 1498,
1501
+ "map@75": 1499,
1502
+ "map@_iou=0.50:0.95": 1500,
1503
+ "map_l": 1501,
1504
+ "map_m": 1502,
1505
+ "map_micro": 1503,
1506
+ "map_rn50": 1504,
1507
+ "map_s": 1505,
1508
+ "map_val": 1506,
1509
+ "map_vit-b/16": 1507,
1510
+ "maph/l2": 1508,
1511
+ "mare": 1509,
1512
+ "mask_ap": 1510,
1513
+ "matched": 1511,
1514
+ "math": 1512,
1515
+ "math_500": 1513,
1516
+ "math_level_5": 1514,
1517
+ "math_verify": 1515,
1518
+ "mathew's_coefficient": 1516,
1519
+ "matthews_correlation": 1517,
1520
+ "matthews_correlation_coefficient": 1518,
1521
+ "mauve": 1519,
1522
+ "max_accuracy": 1520,
1523
+ "max_accuracy_threshold": 1521,
1524
+ "max_ap": 1522,
1525
+ "max_error_alpha": 1523,
1526
+ "max_f1": 1524,
1527
+ "max_f1_threshold": 1525,
1528
+ "max_mcc": 1526,
1529
+ "max_precision": 1527,
1530
+ "max_recall": 1528,
1531
+ "max_reward": 1529,
1532
+ "maxfm": 1530,
1533
+ "maxsim_accuracy@1": 1531,
1534
+ "maxsim_accuracy@10": 1532,
1535
+ "maxsim_accuracy@3": 1533,
1536
+ "maxsim_accuracy@5": 1534,
1537
+ "maxsim_map@100": 1535,
1538
+ "maxsim_mrr@10": 1536,
1539
+ "maxsim_ndcg@10": 1537,
1540
+ "maxsim_precision@1": 1538,
1541
+ "maxsim_precision@10": 1539,
1542
+ "maxsim_precision@3": 1540,
1543
+ "maxsim_precision@5": 1541,
1544
+ "maxsim_recall@1": 1542,
1545
+ "maxsim_recall@10": 1543,
1546
+ "maxsim_recall@3": 1544,
1547
+ "maxsim_recall@5": 1545,
1548
+ "mbpp": 1546,
1549
+ "mbpp_pass@1": 1547,
1550
+ "mc1": 1548,
1551
+ "mc1_accuracy": 1549,
1552
+ "mc1_accuracy_stderr": 1550,
1553
+ "mc2": 1551,
1554
+ "mc2_accuracy": 1552,
1555
+ "mc2_accuracy_stderr": 1553,
1556
+ "mcap": 1554,
1557
+ "mcc": 1555,
1558
+ "mean": 1556,
1559
+ "mean-ep-length": 1557,
1560
+ "mean-reward": 1558,
1561
+ "mean@1": 1559,
1562
+ "mean_absolute_error": 1560,
1563
+ "mean_absolute_error_mae": 1561,
1564
+ "mean_accuracy": 1562,
1565
+ "mean_ap": 1563,
1566
+ "mean_auc@5\u00b0": 1564,
1567
+ "mean_average_precision": 1565,
1568
+ "mean_average_precision@iou_0.50": 1566,
1569
+ "mean_average_precision@iou_0.75": 1567,
1570
+ "mean_average_precision_iou=0.5": 1568,
1571
+ "mean_average_precision_iou=0.5:0.95": 1569,
1572
+ "mean_average_precision_map@50": 1570,
1573
+ "mean_average_precision_map@50-95": 1571,
1574
+ "mean_corruption_error_mce": 1572,
1575
+ "mean_dice": 1573,
1576
+ "mean_episode_length": 1574,
1577
+ "mean_error_px": 1575,
1578
+ "mean_f1_intermediate": 1576,
1579
+ "mean_iou": 1577,
1580
+ "mean_iou_class": 1578,
1581
+ "mean_opinion_score": 1579,
1582
+ "mean_opinion_score_mos": 1580,
1583
+ "mean_p_ai": 1581,
1584
+ "mean_rating": 1582,
1585
+ "mean_recall": 1583,
1586
+ "mean_reciprocal_rank": 1584,
1587
+ "mean_reconstruction_error_mm": 1585,
1588
+ "mean_regret_\u03b4wp_late_&_close": 1586,
1589
+ "mean_regret_\u03b4wp_overall": 1587,
1590
+ "mean_reward": 1588,
1591
+ "mean_reward_20_episodes": 1589,
1592
+ "mean_rmse_multi-head": 1590,
1593
+ "mean_ru": 1591,
1594
+ "mean_squared_error": 1592,
1595
+ "mean_squared_error_for_ordinal_data": 1593,
1596
+ "mean_token_accuracy": 1594,
1597
+ "median_absolute_error_mdae": 1595,
1598
+ "medical_keyword_coverage": 1596,
1599
+ "medical_q&a": 1597,
1600
+ "medmcqa_mc": 1598,
1601
+ "medqa_mc": 1599,
1602
+ "membrane": 1600,
1603
+ "memory_efficiency": 1601,
1604
+ "memory_efficiency_improvement_x": 1602,
1605
+ "memory_footprint_mb": 1603,
1606
+ "memory_peak_mb": 1604,
1607
+ "memory_reduction_vs_fp32_baseline_%": 1605,
1608
+ "mer": 1606,
1609
+ "meteor": 1607,
1610
+ "metric": 1608,
1611
+ "micro": 1609,
1612
+ "micro-f1": 1610,
1613
+ "micro-f1_score": 1611,
1614
+ "micro-f1_strong": 1612,
1615
+ "micro-precision": 1613,
1616
+ "micro-recall": 1614,
1617
+ "micro_auc": 1615,
1618
+ "micro_avg/rougel": 1616,
1619
+ "micro_f1": 1617,
1620
+ "micro_f1_cardiffnlp/tweet_sentiment_multilingual/all": 1618,
1621
+ "micro_f1_cardiffnlp/tweet_topic_multi": 1619,
1622
+ "micro_f1_cardiffnlp/tweet_topic_single": 1620,
1623
+ "micro_f1_optimized_thresholds": 1621,
1624
+ "micro_f1_score": 1622,
1625
+ "micro_f1_tweet_eval/emoji": 1623,
1626
+ "micro_f1_tweet_eval/emotion": 1624,
1627
+ "micro_f1_tweet_eval/hate": 1625,
1628
+ "micro_f1_tweet_eval/irony": 1626,
1629
+ "micro_f1_tweet_eval/offensive": 1627,
1630
+ "micro_f1_tweet_eval/sentiment": 1628,
1631
+ "micro_precision": 1629,
1632
+ "micro_recall": 1630,
1633
+ "min_reward": 1631,
1634
+ "miou": 1632,
1635
+ "miou_13_classes": 1633,
1636
+ "miou_6-fold": 1634,
1637
+ "miou_after_lora": 1635,
1638
+ "miou_before_lora": 1636,
1639
+ "miou_real": 1637,
1640
+ "miou_test": 1638,
1641
+ "miouparts": 1639,
1642
+ "misc_f1-score": 1640,
1643
+ "misc_precision": 1641,
1644
+ "misc_recall": 1642,
1645
+ "miscs_f1": 1643,
1646
+ "mixture_accuracy": 1644,
1647
+ "mlm_accuracy": 1645,
1648
+ "mmlu": 1646,
1649
+ "mmlu-pem_0-shot": 1647,
1650
+ "mmlu_5-shot": 1648,
1651
+ "mmlu_accuracy": 1649,
1652
+ "mmlu_high_school_european_history": 1650,
1653
+ "mmlu_high_school_us_history": 1651,
1654
+ "mmlu_high_school_world_history": 1652,
1655
+ "mmlu_humanities": 1653,
1656
+ "mmlu_jurisprudence": 1654,
1657
+ "mmlu_logical_fallacies": 1655,
1658
+ "mmlu_moral_disputes": 1656,
1659
+ "mmlu_other": 1657,
1660
+ "mmlu_overall": 1658,
1661
+ "mmlu_pro": 1659,
1662
+ "mmlu_pro_mc": 1660,
1663
+ "mmlu_score": 1661,
1664
+ "mmlu_social_sci.": 1662,
1665
+ "mmlu_stem": 1663,
1666
+ "mmmlu_de_de_0-shot": 1664,
1667
+ "mmmlu_de_de_5-shot": 1665,
1668
+ "model-parameter": 1666,
1669
+ "model-parameters-reduction_%": 1667,
1670
+ "model_loss": 1668,
1671
+ "model_score": 1669,
1672
+ "model_size_kb": 1670,
1673
+ "modelnet40_average": 1671,
1674
+ "molecule_uniqueness_rate": 1672,
1675
+ "morph_ufeats_accuracy": 1673,
1676
+ "morphology_f1": 1674,
1677
+ "mota": 1675,
1678
+ "mp-lpips": 1676,
1679
+ "mpjpe": 1677,
1680
+ "mprec": 1678,
1681
+ "mrr": 1679,
1682
+ "mrr@1": 1680,
1683
+ "mrr@10": 1681,
1684
+ "mrr@100": 1682,
1685
+ "mrr@1000": 1683,
1686
+ "mrr@2": 1684,
1687
+ "mrr@20": 1685,
1688
+ "mrr@200": 1686,
1689
+ "mrr@3": 1687,
1690
+ "mrr@30": 1688,
1691
+ "mrr@300": 1689,
1692
+ "mrr@5": 1690,
1693
+ "mrr@50": 1691,
1694
+ "mrr@500": 1692,
1695
+ "mrr@7": 1693,
1696
+ "mrr@70": 1694,
1697
+ "mrr@700": 1695,
1698
+ "mrr_1": 1696,
1699
+ "mrr_10": 1697,
1700
+ "mrr_5": 1698,
1701
+ "mrr_baseline": 1699,
1702
+ "mrr_on_abr_core_exam_chest": 1700,
1703
+ "mrr_with_bi-encoder": 1701,
1704
+ "mrr_with_full_pipeline": 1702,
1705
+ "mrr_with_instructions": 1703,
1706
+ "mse": 1704,
1707
+ "mse_loss": 1705,
1708
+ "mse_masked;_dims=x/y": 1706,
1709
+ "mt-bench": 1707,
1710
+ "mt-bench_score": 1708,
1711
+ "mt-bench_win_rate_adjusted_%": 1709,
1712
+ "mtbench": 1710,
1713
+ "multilabel_accuracy": 1711,
1714
+ "multilabel_roc_auc": 1712,
1715
+ "multipl_humaneval": 1713,
1716
+ "multipl_mbppp": 1714,
1717
+ "music_accuracy": 1715,
1718
+ "musicality": 1716,
1719
+ "mwap": 1717,
1720
+ "n_embd": 1718,
1721
+ "n_evaluation_episodes": 1719,
1722
+ "n_head": 1720,
1723
+ "n_layer": 1721,
1724
+ "n_samples": 1722,
1725
+ "n_test_samples": 1723,
1726
+ "naive_bayes_accuracy": 1724,
1727
+ "named_entity_linking_f_score": 1725,
1728
+ "named_entity_linking_precision": 1726,
1729
+ "named_entity_linking_recall": 1727,
1730
+ "naturalqs": 1728,
1731
+ "naturalqs_gen2mc_mc": 1729,
1732
+ "nauc_map@1000_diff1": 1730,
1733
+ "nauc_map@1000_diff1_miracl": 1731,
1734
+ "nauc_map@1000_max": 1732,
1735
+ "nauc_map@1000_max_miracl": 1733,
1736
+ "nauc_map@1000_std": 1734,
1737
+ "nauc_map@1000_std_miracl": 1735,
1738
+ "nauc_map@100_diff1": 1736,
1739
+ "nauc_map@100_diff1_miracl": 1737,
1740
+ "nauc_map@100_max": 1738,
1741
+ "nauc_map@100_max_miracl": 1739,
1742
+ "nauc_map@100_std": 1740,
1743
+ "nauc_map@100_std_miracl": 1741,
1744
+ "nauc_map@10_diff1": 1742,
1745
+ "nauc_map@10_diff1_miracl": 1743,
1746
+ "nauc_map@10_max": 1744,
1747
+ "nauc_map@10_max_miracl": 1745,
1748
+ "nauc_map@10_std": 1746,
1749
+ "nauc_map@10_std_miracl": 1747,
1750
+ "nauc_map@1_diff1": 1748,
1751
+ "nauc_map@1_diff1_miracl": 1749,
1752
+ "nauc_map@1_max": 1750,
1753
+ "nauc_map@1_max_miracl": 1751,
1754
+ "nauc_map@1_std": 1752,
1755
+ "nauc_map@1_std_miracl": 1753,
1756
+ "nauc_map@20_diff1": 1754,
1757
+ "nauc_map@20_diff1_miracl": 1755,
1758
+ "nauc_map@20_max": 1756,
1759
+ "nauc_map@20_max_miracl": 1757,
1760
+ "nauc_map@20_std": 1758,
1761
+ "nauc_map@20_std_miracl": 1759,
1762
+ "nauc_map@3_diff1": 1760,
1763
+ "nauc_map@3_diff1_miracl": 1761,
1764
+ "nauc_map@3_max": 1762,
1765
+ "nauc_map@3_max_miracl": 1763,
1766
+ "nauc_map@3_std": 1764,
1767
+ "nauc_map@3_std_miracl": 1765,
1768
+ "nauc_map@5_diff1": 1766,
1769
+ "nauc_map@5_diff1_miracl": 1767,
1770
+ "nauc_map@5_max": 1768,
1771
+ "nauc_map@5_max_miracl": 1769,
1772
+ "nauc_map@5_std": 1770,
1773
+ "nauc_map@5_std_miracl": 1771,
1774
+ "nauc_map_diff1": 1772,
1775
+ "nauc_map_max": 1773,
1776
+ "nauc_map_std": 1774,
1777
+ "nauc_mrr@1000_diff1": 1775,
1778
+ "nauc_mrr@1000_max": 1776,
1779
+ "nauc_mrr@1000_std": 1777,
1780
+ "nauc_mrr@100_diff1": 1778,
1781
+ "nauc_mrr@100_max": 1779,
1782
+ "nauc_mrr@100_std": 1780,
1783
+ "nauc_mrr@10_diff1": 1781,
1784
+ "nauc_mrr@10_max": 1782,
1785
+ "nauc_mrr@10_std": 1783,
1786
+ "nauc_mrr@1_diff1": 1784,
1787
+ "nauc_mrr@1_max": 1785,
1788
+ "nauc_mrr@1_std": 1786,
1789
+ "nauc_mrr@20_diff1": 1787,
1790
+ "nauc_mrr@20_max": 1788,
1791
+ "nauc_mrr@20_std": 1789,
1792
+ "nauc_mrr@3_diff1": 1790,
1793
+ "nauc_mrr@3_max": 1791,
1794
+ "nauc_mrr@3_std": 1792,
1795
+ "nauc_mrr@5_diff1": 1793,
1796
+ "nauc_mrr@5_max": 1794,
1797
+ "nauc_mrr@5_std": 1795,
1798
+ "nauc_mrr_diff1": 1796,
1799
+ "nauc_mrr_max": 1797,
1800
+ "nauc_mrr_std": 1798,
1801
+ "nauc_ndcg@1000_diff1": 1799,
1802
+ "nauc_ndcg@1000_diff1_miracl": 1800,
1803
+ "nauc_ndcg@1000_max": 1801,
1804
+ "nauc_ndcg@1000_max_miracl": 1802,
1805
+ "nauc_ndcg@1000_std": 1803,
1806
+ "nauc_ndcg@1000_std_miracl": 1804,
1807
+ "nauc_ndcg@100_diff1": 1805,
1808
+ "nauc_ndcg@100_diff1_miracl": 1806,
1809
+ "nauc_ndcg@100_max": 1807,
1810
+ "nauc_ndcg@100_max_miracl": 1808,
1811
+ "nauc_ndcg@100_std": 1809,
1812
+ "nauc_ndcg@100_std_miracl": 1810,
1813
+ "nauc_ndcg@10_diff1": 1811,
1814
+ "nauc_ndcg@10_diff1_miracl": 1812,
1815
+ "nauc_ndcg@10_max": 1813,
1816
+ "nauc_ndcg@10_max_miracl": 1814,
1817
+ "nauc_ndcg@10_std": 1815,
1818
+ "nauc_ndcg@10_std_miracl": 1816,
1819
+ "nauc_ndcg@1_diff1": 1817,
1820
+ "nauc_ndcg@1_diff1_miracl": 1818,
1821
+ "nauc_ndcg@1_max": 1819,
1822
+ "nauc_ndcg@1_max_miracl": 1820,
1823
+ "nauc_ndcg@1_std": 1821,
1824
+ "nauc_ndcg@1_std_miracl": 1822,
1825
+ "nauc_ndcg@20_diff1": 1823,
1826
+ "nauc_ndcg@20_diff1_miracl": 1824,
1827
+ "nauc_ndcg@20_max": 1825,
1828
+ "nauc_ndcg@20_max_miracl": 1826,
1829
+ "nauc_ndcg@20_std": 1827,
1830
+ "nauc_ndcg@20_std_miracl": 1828,
1831
+ "nauc_ndcg@3_diff1": 1829,
1832
+ "nauc_ndcg@3_diff1_miracl": 1830,
1833
+ "nauc_ndcg@3_max": 1831,
1834
+ "nauc_ndcg@3_max_miracl": 1832,
1835
+ "nauc_ndcg@3_std": 1833,
1836
+ "nauc_ndcg@3_std_miracl": 1834,
1837
+ "nauc_ndcg@5_diff1": 1835,
1838
+ "nauc_ndcg@5_diff1_miracl": 1836,
1839
+ "nauc_ndcg@5_max": 1837,
1840
+ "nauc_ndcg@5_max_miracl": 1838,
1841
+ "nauc_ndcg@5_std": 1839,
1842
+ "nauc_ndcg@5_std_miracl": 1840,
1843
+ "nauc_p@1000_diff1_miracl": 1841,
1844
+ "nauc_p@1000_max_miracl": 1842,
1845
+ "nauc_p@1000_std_miracl": 1843,
1846
+ "nauc_p@100_diff1_miracl": 1844,
1847
+ "nauc_p@100_max_miracl": 1845,
1848
+ "nauc_p@100_std_miracl": 1846,
1849
+ "nauc_p@10_diff1_miracl": 1847,
1850
+ "nauc_p@10_max_miracl": 1848,
1851
+ "nauc_p@10_std_miracl": 1849,
1852
+ "nauc_p@1_diff1_miracl": 1850,
1853
+ "nauc_p@1_max_miracl": 1851,
1854
+ "nauc_p@1_std_miracl": 1852,
1855
+ "nauc_p@20_diff1_miracl": 1853,
1856
+ "nauc_p@20_max_miracl": 1854,
1857
+ "nauc_p@20_std_miracl": 1855,
1858
+ "nauc_p@3_diff1_miracl": 1856,
1859
+ "nauc_p@3_max_miracl": 1857,
1860
+ "nauc_p@3_std_miracl": 1858,
1861
+ "nauc_p@5_diff1_miracl": 1859,
1862
+ "nauc_p@5_max_miracl": 1860,
1863
+ "nauc_p@5_std_miracl": 1861,
1864
+ "nauc_precision@1000_diff1": 1862,
1865
+ "nauc_precision@1000_max": 1863,
1866
+ "nauc_precision@1000_std": 1864,
1867
+ "nauc_precision@100_diff1": 1865,
1868
+ "nauc_precision@100_max": 1866,
1869
+ "nauc_precision@100_std": 1867,
1870
+ "nauc_precision@10_diff1": 1868,
1871
+ "nauc_precision@10_max": 1869,
1872
+ "nauc_precision@10_std": 1870,
1873
+ "nauc_precision@1_diff1": 1871,
1874
+ "nauc_precision@1_max": 1872,
1875
+ "nauc_precision@1_std": 1873,
1876
+ "nauc_precision@20_diff1": 1874,
1877
+ "nauc_precision@20_max": 1875,
1878
+ "nauc_precision@20_std": 1876,
1879
+ "nauc_precision@3_diff1": 1877,
1880
+ "nauc_precision@3_max": 1878,
1881
+ "nauc_precision@3_std": 1879,
1882
+ "nauc_precision@5_diff1": 1880,
1883
+ "nauc_precision@5_max": 1881,
1884
+ "nauc_precision@5_std": 1882,
1885
+ "nauc_recall@1000_diff1": 1883,
1886
+ "nauc_recall@1000_diff1_miracl": 1884,
1887
+ "nauc_recall@1000_max": 1885,
1888
+ "nauc_recall@1000_max_miracl": 1886,
1889
+ "nauc_recall@1000_std": 1887,
1890
+ "nauc_recall@1000_std_miracl": 1888,
1891
+ "nauc_recall@100_diff1": 1889,
1892
+ "nauc_recall@100_diff1_miracl": 1890,
1893
+ "nauc_recall@100_max": 1891,
1894
+ "nauc_recall@100_max_miracl": 1892,
1895
+ "nauc_recall@100_std": 1893,
1896
+ "nauc_recall@100_std_miracl": 1894,
1897
+ "nauc_recall@10_diff1": 1895,
1898
+ "nauc_recall@10_diff1_miracl": 1896,
1899
+ "nauc_recall@10_max": 1897,
1900
+ "nauc_recall@10_max_miracl": 1898,
1901
+ "nauc_recall@10_std": 1899,
1902
+ "nauc_recall@10_std_miracl": 1900,
1903
+ "nauc_recall@1_diff1": 1901,
1904
+ "nauc_recall@1_diff1_miracl": 1902,
1905
+ "nauc_recall@1_max": 1903,
1906
+ "nauc_recall@1_max_miracl": 1904,
1907
+ "nauc_recall@1_std": 1905,
1908
+ "nauc_recall@1_std_miracl": 1906,
1909
+ "nauc_recall@20_diff1": 1907,
1910
+ "nauc_recall@20_diff1_miracl": 1908,
1911
+ "nauc_recall@20_max": 1909,
1912
+ "nauc_recall@20_max_miracl": 1910,
1913
+ "nauc_recall@20_std": 1911,
1914
+ "nauc_recall@20_std_miracl": 1912,
1915
+ "nauc_recall@3_diff1": 1913,
1916
+ "nauc_recall@3_diff1_miracl": 1914,
1917
+ "nauc_recall@3_max": 1915,
1918
+ "nauc_recall@3_max_miracl": 1916,
1919
+ "nauc_recall@3_std": 1917,
1920
+ "nauc_recall@3_std_miracl": 1918,
1921
+ "nauc_recall@5_diff1": 1919,
1922
+ "nauc_recall@5_diff1_miracl": 1920,
1923
+ "nauc_recall@5_max": 1921,
1924
+ "nauc_recall@5_max_miracl": 1922,
1925
+ "nauc_recall@5_std": 1923,
1926
+ "nauc_recall@5_std_miracl": 1924,
1927
+ "ndcg": 1925,
1928
+ "ndcg@1": 1926,
1929
+ "ndcg@10": 1927,
1930
+ "ndcg@100": 1928,
1931
+ "ndcg@1000": 1929,
1932
+ "ndcg@1000_miracl": 1930,
1933
+ "ndcg@100_miracl": 1931,
1934
+ "ndcg@10_miracl": 1932,
1935
+ "ndcg@1_miracl": 1933,
1936
+ "ndcg@2": 1934,
1937
+ "ndcg@20": 1935,
1938
+ "ndcg@200": 1936,
1939
+ "ndcg@20_baseline": 1937,
1940
+ "ndcg@20_miracl": 1938,
1941
+ "ndcg@20_with_instructions": 1939,
1942
+ "ndcg@3": 1940,
1943
+ "ndcg@30": 1941,
1944
+ "ndcg@300": 1942,
1945
+ "ndcg@3_miracl": 1943,
1946
+ "ndcg@5": 1944,
1947
+ "ndcg@50": 1945,
1948
+ "ndcg@500": 1946,
1949
+ "ndcg@5_miracl": 1947,
1950
+ "ndcg@7": 1948,
1951
+ "ndcg@70": 1949,
1952
+ "ndcg@700": 1950,
1953
+ "nds": 1951,
1954
+ "ndtw_val_unseen": 1952,
1955
+ "negative_mse": 1953,
1956
+ "negatives": 1954,
1957
+ "ner_f1_score": 1955,
1958
+ "ner_f_score": 1956,
1959
+ "ner_precision": 1957,
1960
+ "ner_recall": 1958,
1961
+ "niqe": 1959,
1962
+ "nmi": 1960,
1963
+ "noans_exact": 1961,
1964
+ "noans_f1": 1962,
1965
+ "noans_total": 1963,
1966
+ "noc@85": 1964,
1967
+ "noc@90": 1965,
1968
+ "non-degradation_rate": 1966,
1969
+ "normalized_accuracy_acc_norm": 1967,
1970
+ "normalized_accuracy_stderr": 1968,
1971
+ "normalized_cer": 1969,
1972
+ "normalized_levenshtein_distance": 1970,
1973
+ "normalized_levenshtein_similarity": 1971,
1974
+ "normalized_return": 1972,
1975
+ "normalized_score_iqm_95%_ci": 1973,
1976
+ "normalized_wer": 1974,
1977
+ "note-level_f-measure-no-offset_fno": 1975,
1978
+ "noun_top5_map": 1976,
1979
+ "npv": 1977,
1980
+ "null_f1": 1978,
1981
+ "num_active_experts": 1979,
1982
+ "num_experts": 1980,
1983
+ "num_gpus": 1981,
1984
+ "num_tokens": 1982,
1985
+ "number_accuracy": 1983,
1986
+ "number_of_params": 1984,
1987
+ "number_of_tokens": 1985,
1988
+ "numbers_accuracy_%": 1986,
1989
+ "objaverse_average": 1987,
1990
+ "object_count": 1988,
1991
+ "object_persistence_\u22655_frames": 1989,
1992
+ "object_precision": 1990,
1993
+ "object_recall": 1991,
1994
+ "object_size": 1992,
1995
+ "off-domain_citations": 1993,
1996
+ "off_by_1_accuracy": 1994,
1997
+ "olmo_3-eval_code": 1995,
1998
+ "olmo_3-eval_genqa": 1996,
1999
+ "olmo_3-eval_math": 1997,
2000
+ "olmo_3-eval_mc_non-stem": 1998,
2001
+ "olmo_3-eval_mc_stem": 1999,
2002
+ "openbookqa": 2000,
2003
+ "openthaigpt": 2001,
2004
+ "org_f1-score": 2002,
2005
+ "org_precision": 2003,
2006
+ "org_recall": 2004,
2007
+ "organization_public_institution_or_collective_actor_seqeval": 2005,
2008
+ "original_accuracy": 2006,
2009
+ "oscillation_count": 2007,
2010
+ "other": 2008,
2011
+ "other_accuracy": 2009,
2012
+ "overall": 2010,
2013
+ "overall_accuarcy": 2011,
2014
+ "overall_accuracy": 2012,
2015
+ "overall_devops_accuracy": 2013,
2016
+ "overall_f1": 2014,
2017
+ "overall_f1_weighted_avg": 2015,
2018
+ "overall_iou": 2016,
2019
+ "overall_match": 2017,
2020
+ "overall_precision": 2018,
2021
+ "overall_precision_weighted_avg": 2019,
2022
+ "overall_recall": 2020,
2023
+ "overall_recall_weighted_avg": 2021,
2024
+ "overall_satisfaction_live": 2022,
2025
+ "overall_satisfaction_stress": 2023,
2026
+ "overall_score": 2024,
2027
+ "overall_success_rate": 2025,
2028
+ "overall_test_accuracy": 2026,
2029
+ "overall_wer": 2027,
2030
+ "overshoot_%": 2028,
2031
+ "p": 2029,
2032
+ "p-mrr": 2030,
2033
+ "p@1": 2031,
2034
+ "p@10": 2032,
2035
+ "p@1000_miracl": 2033,
2036
+ "p@100_miracl": 2034,
2037
+ "p@10_baseline": 2035,
2038
+ "p@10_miracl": 2036,
2039
+ "p@10_with_instructions": 2037,
2040
+ "p@1_miracl": 2038,
2041
+ "p@20": 2039,
2042
+ "p@20_miracl": 2040,
2043
+ "p@3_miracl": 2041,
2044
+ "p@5": 2042,
2045
+ "p@5_miracl": 2043,
2046
+ "p@m": 2044,
2047
+ "pairwise_accuracy": 2045,
2048
+ "paralux_accuracy": 2046,
2049
+ "parameter_count": 2047,
2050
+ "parameters": 2048,
2051
+ "params_img_m": 2049,
2052
+ "params_m_-_img": 2050,
2053
+ "params_m_-_txt": 2051,
2054
+ "params_m_img": 2052,
2055
+ "params_m_txt": 2053,
2056
+ "params_txt_m": 2054,
2057
+ "partial_score": 2055,
2058
+ "particles_accuracy_%": 2056,
2059
+ "partpq": 2057,
2060
+ "pass@1": 2058,
2061
+ "pass@10": 2059,
2062
+ "pass@100": 2060,
2063
+ "pass@100_t=0.8": 2061,
2064
+ "pass@10_java": 2062,
2065
+ "pass@10_javascript": 2063,
2066
+ "pass@10_python": 2064,
2067
+ "pass@10_t=0.8": 2065,
2068
+ "pass@16": 2066,
2069
+ "pass@1_0-shot_cot": 2067,
2070
+ "pass@1_avg16": 2068,
2071
+ "pass@1_code_generation": 2069,
2072
+ "pass@1_function_completion": 2070,
2073
+ "pass@1_java": 2071,
2074
+ "pass@1_javascript": 2072,
2075
+ "pass@1_multimodal": 2073,
2076
+ "pass@1_n=1_code_instruct": 2074,
2077
+ "pass@1_n=1_humaneval_greedy_instruct": 2075,
2078
+ "pass@1_n=1_humaneval_plus_greedy_instruct": 2076,
2079
+ "pass@1_n=1_mbpp_plus_0shot_instruct": 2077,
2080
+ "pass@1_n=1_mbpp_sanitized_0shot_instruct": 2078,
2081
+ "pass@1_overall": 2079,
2082
+ "pass@1_python": 2080,
2083
+ "pass@1_t=0.01": 2081,
2084
+ "pass@1_t=0.1": 2082,
2085
+ "pass@1_t=0.2": 2083,
2086
+ "pass@1_thresh=0.5": 2084,
2087
+ "pass@3": 2085,
2088
+ "pass@32": 2086,
2089
+ "pass@4": 2087,
2090
+ "pass@4_overall": 2088,
2091
+ "pck@0.2": 2089,
2092
+ "pck@0.3_ood": 2090,
2093
+ "pckh-0.5": 2091,
2094
+ "pckh@0.1": 2092,
2095
+ "peak_time_s": 2093,
2096
+ "pearson": 2094,
2097
+ "pearson's_r_distress": 2095,
2098
+ "pearson's_r_empathy": 2096,
2099
+ "pearson_correlation": 2097,
2100
+ "pearson_correlation_-_stsb_multi_mt_fr": 2098,
2101
+ "pearson_correlation_cosine_similarity": 2099,
2102
+ "pearson_cosine": 2100,
2103
+ "pearson_dot": 2101,
2104
+ "pearson_euclidean": 2102,
2105
+ "pearson_manhattan": 2103,
2106
+ "pearson_max": 2104,
2107
+ "pearson_spearman_avg": 2105,
2108
+ "pearsonr": 2106,
2109
+ "pearsonr_dynamic_8b": 2107,
2110
+ "pearsonr_onnx": 2108,
2111
+ "pearsonr_optimized": 2109,
2112
+ "pearsonr_static_8b": 2110,
2113
+ "per-class_accuracy": 2111,
2114
+ "per-joint_success_rate_5%_tolerance": 2112,
2115
+ "per_f1-score": 2113,
2116
+ "per_precision": 2114,
2117
+ "per_recall": 2115,
2118
+ "percent_parseable": 2116,
2119
+ "percentage_correct": 2117,
2120
+ "percentage_error": 2118,
2121
+ "percentile": 2119,
2122
+ "percision": 2120,
2123
+ "performance_index": 2121,
2124
+ "performance_semantic_search_6_datasets": 2122,
2125
+ "performance_sentence_embeddings_14_datasets": 2123,
2126
+ "perplexity": 2124,
2127
+ "perplexity_baseline": 2125,
2128
+ "perplexity_basic": 2126,
2129
+ "perplexity_best_checkpoint": 2127,
2130
+ "perplexity_gpt-2_baseline": 2128,
2131
+ "perplexity_ibce": 2129,
2132
+ "perplexity_mean_evaluation": 2130,
2133
+ "perplexity_wip": 2131,
2134
+ "perplexity_\u2193": 2132,
2135
+ "pesq": 2133,
2136
+ "phd_evaluation_score_/100": 2134,
2137
+ "phone_error_rate": 2135,
2138
+ "phoneme_error_rate": 2136,
2139
+ "phoneme_error_rate_per_%": 2137,
2140
+ "phoneme_group_error_rate": 2138,
2141
+ "physical_cores": 2139,
2142
+ "piqa": 2140,
2143
+ "piqa_mc": 2141,
2144
+ "pixel_accuracy": 2142,
2145
+ "placeholder_metric_for_development": 2143,
2146
+ "policy_agreement_late_&_close": 2144,
2147
+ "policy_agreement_top-\u03b4wp": 2145,
2148
+ "political_group_seqeval": 2146,
2149
+ "political_institution_seqeval": 2147,
2150
+ "pooling_attention_dropout": 2148,
2151
+ "pos-level0": 2149,
2152
+ "pos_upos_accuracy": 2150,
2153
+ "poseval": 2151,
2154
+ "positives": 2152,
2155
+ "ppl": 2153,
2156
+ "ppl_per_million_parameters": 2154,
2157
+ "ppv": 2155,
2158
+ "ppv_precision": 2156,
2159
+ "pq": 2157,
2160
+ "pqst": 2158,
2161
+ "pr-auc": 2159,
2162
+ "pr_auc": 2160,
2163
+ "pre@10": 2161,
2164
+ "prec@1": 2162,
2165
+ "precision": 2163,
2166
+ "precision-macro": 2164,
2167
+ "precision@1": 2165,
2168
+ "precision@10": 2166,
2169
+ "precision@100": 2167,
2170
+ "precision@1000": 2168,
2171
+ "precision@2": 2169,
2172
+ "precision@20": 2170,
2173
+ "precision@200": 2171,
2174
+ "precision@3": 2172,
2175
+ "precision@30": 2173,
2176
+ "precision@300": 2174,
2177
+ "precision@5": 2175,
2178
+ "precision@50": 2176,
2179
+ "precision@500": 2177,
2180
+ "precision@7": 2178,
2181
+ "precision@70": 2179,
2182
+ "precision@700": 2180,
2183
+ "precision_%": 2181,
2184
+ "precision_'bezeichnung'_macro": 2182,
2185
+ "precision_'thema'_macro": 2183,
2186
+ "precision_20-vote": 2184,
2187
+ "precision_af": 2185,
2188
+ "precision_class_negative": 2186,
2189
+ "precision_class_positive": 2187,
2190
+ "precision_entity_span": 2188,
2191
+ "precision_ham": 2189,
2192
+ "precision_macro": 2190,
2193
+ "precision_macro_avg": 2191,
2194
+ "precision_micro": 2192,
2195
+ "precision_micro_avg": 2193,
2196
+ "precision_ppv": 2194,
2197
+ "precision_rate": 2195,
2198
+ "precision_samples": 2196,
2199
+ "precision_spam": 2197,
2200
+ "precision_strong_class": 2198,
2201
+ "precision_test_2020": 2199,
2202
+ "precision_test_2021": 2200,
2203
+ "precision_threshold=0.94": 2201,
2204
+ "precision_weighted": 2202,
2205
+ "prediction_success_rate": 2203,
2206
+ "preference_accuracy": 2204,
2207
+ "prefill_latency_ms": 2205,
2208
+ "private_score": 2206,
2209
+ "processing_speed_tokens/sec": 2207,
2210
+ "professional_law": 2208,
2211
+ "proficiency_score": 2209,
2212
+ "prompt_compliance_rate_%": 2210,
2213
+ "prompt_level_loose_acc": 2211,
2214
+ "prompt_level_loose_accuracy": 2212,
2215
+ "prompt_level_strict_acc": 2213,
2216
+ "prompt_level_strict_accuracy": 2214,
2217
+ "proper_names_accuracy_%": 2215,
2218
+ "psnr": 2216,
2219
+ "psnr_srgb": 2217,
2220
+ "public_avg._f1": 2218,
2221
+ "public_score": 2219,
2222
+ "q3": 2220,
2223
+ "q8": 2221,
2224
+ "qa_accuracy": 2222,
2225
+ "qc_decision_accuracy": 2223,
2226
+ "query_active_dims": 2224,
2227
+ "query_sparsity_ratio": 2225,
2228
+ "question_pair_acc": 2226,
2229
+ "qwk": 2227,
2230
+ "r": 2228,
2231
+ "r-1_f1": 2229,
2232
+ "r-2_f1": 2230,
2233
+ "r-l_f1": 2231,
2234
+ "r-precision": 2232,
2235
+ "r-r2": 2233,
2236
+ "r-squared": 2234,
2237
+ "r1": 2235,
2238
+ "r1@0.5": 2236,
2239
+ "r2_score": 2237,
2240
+ "r@1": 2238,
2241
+ "r@10": 2239,
2242
+ "r@1_iou=0.3": 2240,
2243
+ "r@1_iou=0.5": 2241,
2244
+ "r@5": 2242,
2245
+ "r@m": 2243,
2246
+ "r_squared": 2244,
2247
+ "race-m": 2245,
2248
+ "radgraph_f1": 2246,
2249
+ "rank-1": 2247,
2250
+ "rank-1_accuracy_rn50": 2248,
2251
+ "rank-1_accuracy_vit-b/16": 2249,
2252
+ "rank-1_all_search": 2250,
2253
+ "rank_128-dim": 2251,
2254
+ "raw_score": 2252,
2255
+ "re+_micro_f1": 2253,
2256
+ "real_acc": 2254,
2257
+ "reasonable_miss_rate": 2255,
2258
+ "reasoning": 2256,
2259
+ "reasoning_accuracy": 2257,
2260
+ "reasoning_accuracy_%": 2258,
2261
+ "reasoning_alg.": 2259,
2262
+ "reasoning_quality_score": 2260,
2263
+ "recall": 2261,
2264
+ "recall-macro": 2262,
2265
+ "recall@1": 2263,
2266
+ "recall@10": 2264,
2267
+ "recall@100": 2265,
2268
+ "recall@1000": 2266,
2269
+ "recall@1000_miracl": 2267,
2270
+ "recall@100_miracl": 2268,
2271
+ "recall@10_miracl": 2269,
2272
+ "recall@1_%": 2270,
2273
+ "recall@1_hn-atom_uc": 2271,
2274
+ "recall@1_miracl": 2272,
2275
+ "recall@2": 2273,
2276
+ "recall@20": 2274,
2277
+ "recall@200": 2275,
2278
+ "recall@20_miracl": 2276,
2279
+ "recall@3": 2277,
2280
+ "recall@30": 2278,
2281
+ "recall@300": 2279,
2282
+ "recall@3_miracl": 2280,
2283
+ "recall@5": 2281,
2284
+ "recall@50": 2282,
2285
+ "recall@500": 2283,
2286
+ "recall@5_miracl": 2284,
2287
+ "recall@7": 2285,
2288
+ "recall@70": 2286,
2289
+ "recall@700": 2287,
2290
+ "recall_%": 2288,
2291
+ "recall_'bezeichnung'_macro": 2289,
2292
+ "recall_'thema'_macro": 2290,
2293
+ "recall_20-vote": 2291,
2294
+ "recall_af": 2292,
2295
+ "recall_class_negative": 2293,
2296
+ "recall_class_positive": 2294,
2297
+ "recall_crisis_detection_rate": 2295,
2298
+ "recall_entity_span": 2296,
2299
+ "recall_ham": 2297,
2300
+ "recall_macro": 2298,
2301
+ "recall_macro_avg": 2299,
2302
+ "recall_micro": 2300,
2303
+ "recall_micro_avg": 2301,
2304
+ "recall_samples": 2302,
2305
+ "recall_sensitivity": 2303,
2306
+ "recall_spam": 2304,
2307
+ "recall_strong_class": 2305,
2308
+ "recall_test_2020": 2306,
2309
+ "recall_test_2021": 2307,
2310
+ "recall_threshold=0.94": 2308,
2311
+ "recall_tpr": 2309,
2312
+ "recall_weighted": 2310,
2313
+ "recognition-of-done": 2311,
2314
+ "recognition_events": 2312,
2315
+ "refusal_rate": 2313,
2316
+ "relative_direction": 2314,
2317
+ "relative_distance": 2315,
2318
+ "relative_polarity_precision": 2316,
2319
+ "remaining": 2317,
2320
+ "repetition/looping_prevalence": 2318,
2321
+ "reranking_4_datasets": 2319,
2322
+ "response_relevance": 2320,
2323
+ "response_time_ms": 2321,
2324
+ "response_token_reduction": 2322,
2325
+ "results_partial_f1": 2323,
2326
+ "retention_%": 2324,
2327
+ "retrieval_8_datasets": 2325,
2328
+ "reward_gap": 2326,
2329
+ "rhythmic_presence_and_stability": 2327,
2330
+ "rise_time_s": 2328,
2331
+ "risk-reward_ratio": 2329,
2332
+ "rmse": 2330,
2333
+ "rmse_alpha": 2331,
2334
+ "rmse_cooperative": 2332,
2335
+ "rmse_delta_cola_to_final": 2333,
2336
+ "rmse_delta_perplexity_to_final_large": 2334,
2337
+ "rmse_iter_to_final_simplified": 2335,
2338
+ "rmse_m": 2336,
2339
+ "rmse_original_scale_-2_to_+2": 2337,
2340
+ "rmse_original_scale_0-3": 2338,
2341
+ "rmse_robbert_delta_blurb_to_final": 2339,
2342
+ "robustness_score": 2340,
2343
+ "roc": 2341,
2344
+ "roc-auc": 2342,
2345
+ "roc-auc_macro": 2343,
2346
+ "roc-auc_std_dev": 2344,
2347
+ "roc_auc": 2345,
2348
+ "roc_auc_macro": 2346,
2349
+ "roc_auc_micro": 2347,
2350
+ "roc_auc_samples": 2348,
2351
+ "roc_auc_weighted": 2349,
2352
+ "rogue1": 2350,
2353
+ "roleplay": 2351,
2354
+ "room_size": 2352,
2355
+ "root_mean_squared_error": 2353,
2356
+ "rouge": 2354,
2357
+ "rouge-1": 2355,
2358
+ "rouge-1-f1": 2356,
2359
+ "rouge-1-precision": 2357,
2360
+ "rouge-1-recall": 2358,
2361
+ "rouge-1_f1": 2359,
2362
+ "rouge-1_improvement": 2360,
2363
+ "rouge-1_score": 2361,
2364
+ "rouge-2": 2362,
2365
+ "rouge-2-f1": 2363,
2366
+ "rouge-2-precision": 2364,
2367
+ "rouge-2-recall": 2365,
2368
+ "rouge-2_f1": 2366,
2369
+ "rouge-2_improvement": 2367,
2370
+ "rouge-l-f1": 2368,
2371
+ "rouge-l-precision": 2369,
2372
+ "rouge-l-recall": 2370,
2373
+ "rouge-l_f1": 2371,
2374
+ "rouge-l_improvement": 2372,
2375
+ "rouge-l_qa": 2373,
2376
+ "rouge-l_score": 2374,
2377
+ "rouge-lsum": 2375,
2378
+ "rouge1": 2376,
2379
+ "rouge1_acc": 2377,
2380
+ "rouge1_diff": 2378,
2381
+ "rouge1_max": 2379,
2382
+ "rouge2": 2380,
2383
+ "rouge2_acc": 2381,
2384
+ "rouge2_diff": 2382,
2385
+ "rouge2_max": 2383,
2386
+ "rouge_l": 2384,
2387
+ "rouge_score": 2385,
2388
+ "rougel_acc": 2386,
2389
+ "rougel_diff": 2387,
2390
+ "rougel_max": 2388,
2391
+ "rougelsum": 2389,
2392
+ "route_plan": 2390,
2393
+ "route_quality_score": 2391,
2394
+ "row_non_zero_mean_corpus": 2392,
2395
+ "row_non_zero_mean_query": 2393,
2396
+ "row_sparsity_mean_corpus": 2394,
2397
+ "row_sparsity_mean_query": 2395,
2398
+ "rss_score_7500tok_on_a100_gpu": 2396,
2399
+ "runtime": 2397,
2400
+ "runtime_sec": 2398,
2401
+ "r\u00b2": 2399,
2402
+ "r\u00b2_delta_cola_to_final": 2400,
2403
+ "r\u00b2_delta_perplexity_to_final_large": 2401,
2404
+ "r\u00b2_iter_to_final_simplified": 2402,
2405
+ "r\u00b2_robbert_delta_blurb_to_final": 2403,
2406
+ "s-measure": 2404,
2407
+ "s/n_accuracy": 2405,
2408
+ "sacrebleu": 2406,
2409
+ "sacrebleu_chrf": 2407,
2410
+ "safety_score": 2408,
2411
+ "sample_size": 2409,
2412
+ "samples": 2410,
2413
+ "samples_per_second": 2411,
2414
+ "sanskrit/pali_terms_accuracy_%": 2412,
2415
+ "sanskrit_to_english_translation_-_bleu_score": 2413,
2416
+ "sanskrit_to_english_translation_-_jaccard_similarity": 2414,
2417
+ "sari_easse>=0.2.1": 2415,
2418
+ "scicode": 2416,
2419
+ "sciq_mc": 2417,
2420
+ "score": 2418,
2421
+ "sdr": 2419,
2422
+ "sdr_avg": 2420,
2423
+ "second_turn": 2421,
2424
+ "secondary_structure_3-states": 2422,
2425
+ "secondary_structure_8-states": 2423,
2426
+ "seen_samples_b": 2424,
2427
+ "self-reported": 2425,
2428
+ "semantic_similarity": 2426,
2429
+ "semclass_f1": 2427,
2430
+ "sen": 2428,
2431
+ "sensitivity": 2429,
2432
+ "sensitivity_recall": 2430,
2433
+ "sentence_sacrebleu": 2431,
2434
+ "sentences_f-score": 2432,
2435
+ "sequences": 2433,
2436
+ "settling_time_95%": 2434,
2437
+ "settling_time_s": 2435,
2438
+ "shape_bias": 2436,
2439
+ "si-sdr": 2437,
2440
+ "si-sdri": 2438,
2441
+ "sib-200_lb_accuracy": 2439,
2442
+ "sign_accuracy_3-class": 2440,
2443
+ "silhouette_cosine": 2441,
2444
+ "silhouette_euclidean": 2442,
2445
+ "silhouette_score": 2443,
2446
+ "silma_ragqa_benchmark_score": 2444,
2447
+ "similarity_accuracy": 2445,
2448
+ "similarity_accuracy_threshold": 2446,
2449
+ "similarity_ap": 2447,
2450
+ "similarity_f1": 2448,
2451
+ "similarity_f1_threshold": 2449,
2452
+ "similarity_precision": 2450,
2453
+ "similarity_recall": 2451,
2454
+ "single-line_infilling_pass@1": 2452,
2455
+ "single-line_infilling_pass@10": 2453,
2456
+ "single_choice": 2454,
2457
+ "single_line": 2455,
2458
+ "size": 2456,
2459
+ "slot_f1_micro": 2457,
2460
+ "slot_f1_score": 2458,
2461
+ "slot_precision_micro": 2459,
2462
+ "slot_recall_micro": 2460,
2463
+ "smiles_validity_rate": 2461,
2464
+ "smoothed_bleu-4": 2462,
2465
+ "smotsa": 2463,
2466
+ "social_group_seqeval": 2464,
2467
+ "social_science": 2465,
2468
+ "socialiqa_mc": 2466,
2469
+ "soft-f1": 2467,
2470
+ "software_development.f1_score": 2468,
2471
+ "software_development.precision": 2469,
2472
+ "software_development.recall": 2470,
2473
+ "software_development.support": 2471,
2474
+ "solution_exact_match": 2472,
2475
+ "span-based_f1": 2473,
2476
+ "sparse_acc": 2474,
2477
+ "sparsity": 2475,
2478
+ "sparsity_ratio": 2476,
2479
+ "speaker_similarity": 2477,
2480
+ "spearman": 2478,
2481
+ "spearman's_rho": 2479,
2482
+ "spearman's_\u03c1": 2480,
2483
+ "spearman_ar-ar": 2481,
2484
+ "spearman_correlation": 2482,
2485
+ "spearman_correlation_cosine_similarity": 2483,
2486
+ "spearman_cosine": 2484,
2487
+ "spearman_dot": 2485,
2488
+ "spearman_en-ar": 2486,
2489
+ "spearman_en-de": 2487,
2490
+ "spearman_en-en": 2488,
2491
+ "spearman_en-tr": 2489,
2492
+ "spearman_es-en": 2490,
2493
+ "spearman_es-es": 2491,
2494
+ "spearman_euclidean": 2492,
2495
+ "spearman_fr-en": 2493,
2496
+ "spearman_it-en": 2494,
2497
+ "spearman_ko-ko": 2495,
2498
+ "spearman_main_score": 2496,
2499
+ "spearman_manhattan": 2497,
2500
+ "spearman_max": 2498,
2501
+ "spearman_nl-en": 2499,
2502
+ "spearmanr": 2500,
2503
+ "spearmanr_dynamic_8b": 2501,
2504
+ "spearmanr_onnx": 2502,
2505
+ "spearmanr_optimized": 2503,
2506
+ "spearmanr_static_8b": 2504,
2507
+ "specificity": 2505,
2508
+ "speech_accuracy": 2506,
2509
+ "speedup_vs_fp32_baseline_x": 2507,
2510
+ "spice": 2508,
2511
+ "spl_test_unseen": 2509,
2512
+ "spl_val": 2510,
2513
+ "spl_val_unseen": 2511,
2514
+ "squad": 2512,
2515
+ "squad_em": 2513,
2516
+ "squad_f1": 2514,
2517
+ "squad_gen2mc_mc": 2515,
2518
+ "sr": 2516,
2519
+ "sr_test_unseen": 2517,
2520
+ "sr_val": 2518,
2521
+ "sr_val_unseen": 2519,
2522
+ "src2trg_accuracy": 2520,
2523
+ "ssim": 2521,
2524
+ "ssim_srgb": 2522,
2525
+ "sta": 2523,
2526
+ "stage_match_score": 2524,
2527
+ "standard_parseval_full": 2525,
2528
+ "static_error": 2526,
2529
+ "std_reward": 2527,
2530
+ "stem": 2528,
2531
+ "step_best_checkpoint": 2529,
2532
+ "steps_per_second": 2530,
2533
+ "strict-match": 2531,
2534
+ "strict_accuracy": 2532,
2535
+ "strict_prompt": 2533,
2536
+ "structured_output_compliance": 2534,
2537
+ "sts_8_datasets": 2535,
2538
+ "stsbenchmark": 2536,
2539
+ "style_llm-judge_1-3": 2537,
2540
+ "style_meter_greedy_pass_rate": 2538,
2541
+ "subj_f1": 2539,
2542
+ "subj_p": 2540,
2543
+ "subj_r": 2541,
2544
+ "subset-accuracy": 2542,
2545
+ "subset_accuracy": 2543,
2546
+ "success_rate": 2544,
2547
+ "success_rate_%": 2545,
2548
+ "swe-bench_verified": 2546,
2549
+ "symptomatic_accuracy": 2547,
2550
+ "system_score": 2548,
2551
+ "t/f_accuracy": 2549,
2552
+ "tag_xpos_accuracy": 2550,
2553
+ "tar@far=0.0001": 2551,
2554
+ "target_f1": 2552,
2555
+ "target_rounds": 2553,
2556
+ "task_1": 2554,
2557
+ "task_2": 2555,
2558
+ "task_3": 2556,
2559
+ "task_4": 2557,
2560
+ "task_completion_rate_improvement": 2558,
2561
+ "tau2": 2559,
2562
+ "telugu_wer": 2560,
2563
+ "tempo_match": 2561,
2564
+ "ter": 2562,
2565
+ "terminalbench_hard": 2563,
2566
+ "test": 2564,
2567
+ "test/f1": 2565,
2568
+ "test16_cer": 2566,
2569
+ "test16_wer": 2567,
2570
+ "test20_cer": 2568,
2571
+ "test20_wer": 2569,
2572
+ "test_1-shot_rougel": 2570,
2573
+ "test_accent_accuracy": 2571,
2574
+ "test_accuracy": 2572,
2575
+ "test_accuracy_logistic_regression": 2573,
2576
+ "test_accuracy_on_coscan_speech": 2574,
2577
+ "test_accuracy_original_data": 2575,
2578
+ "test_accuracy_svc": 2576,
2579
+ "test_accuracy_svc_linear": 2577,
2580
+ "test_age_accuracy": 2578,
2581
+ "test_ap": 2579,
2582
+ "test_auc": 2580,
2583
+ "test_bertscore": 2581,
2584
+ "test_bertscore_fanpage": 2582,
2585
+ "test_bertscore_ilpost": 2583,
2586
+ "test_bleu": 2584,
2587
+ "test_bleu_bg->en": 2585,
2588
+ "test_bleu_cs->en": 2586,
2589
+ "test_bleu_da->en": 2587,
2590
+ "test_bleu_de->en": 2588,
2591
+ "test_bleu_el->en": 2589,
2592
+ "test_bleu_en->bg": 2590,
2593
+ "test_bleu_en->cs": 2591,
2594
+ "test_bleu_en->da": 2592,
2595
+ "test_bleu_en->de": 2593,
2596
+ "test_bleu_en->el": 2594,
2597
+ "test_bleu_en->es": 2595,
2598
+ "test_bleu_en->et": 2596,
2599
+ "test_bleu_en->fi": 2597,
2600
+ "test_bleu_en->fr": 2598,
2601
+ "test_bleu_en->hr": 2599,
2602
+ "test_bleu_en->hu": 2600,
2603
+ "test_bleu_en->it": 2601,
2604
+ "test_bleu_en->lt": 2602,
2605
+ "test_bleu_en->lv": 2603,
2606
+ "test_bleu_en->mt": 2604,
2607
+ "test_bleu_en->nl": 2605,
2608
+ "test_bleu_en->pl": 2606,
2609
+ "test_bleu_en->pt": 2607,
2610
+ "test_bleu_en->ro": 2608,
2611
+ "test_bleu_en->ru": 2609,
2612
+ "test_bleu_en->sk": 2610,
2613
+ "test_bleu_en->sl": 2611,
2614
+ "test_bleu_en->sv": 2612,
2615
+ "test_bleu_en->uk": 2613,
2616
+ "test_bleu_es->en": 2614,
2617
+ "test_bleu_et->en": 2615,
2618
+ "test_bleu_fi->en": 2616,
2619
+ "test_bleu_fr->en": 2617,
2620
+ "test_bleu_hr->en": 2618,
2621
+ "test_bleu_hu->en": 2619,
2622
+ "test_bleu_it->en": 2620,
2623
+ "test_bleu_lt->en": 2621,
2624
+ "test_bleu_lv->en": 2622,
2625
+ "test_bleu_mt->en": 2623,
2626
+ "test_bleu_nl->en": 2624,
2627
+ "test_bleu_pl->en": 2625,
2628
+ "test_bleu_pt->en": 2626,
2629
+ "test_bleu_ro->en": 2627,
2630
+ "test_bleu_ru->en": 2628,
2631
+ "test_bleu_sk->en": 2629,
2632
+ "test_bleu_sl->en": 2630,
2633
+ "test_bleu_sv->en": 2631,
2634
+ "test_bleu_taigi->mandrin": 2632,
2635
+ "test_bleu_uk->en": 2633,
2636
+ "test_bokm\u00e5l_cer": 2634,
2637
+ "test_bokm\u00e5l_wer": 2635,
2638
+ "test_cer": 2636,
2639
+ "test_cer_%": 2637,
2640
+ "test_cer_+lm": 2638,
2641
+ "test_cer_mandrin": 2639,
2642
+ "test_cer_no_lm": 2640,
2643
+ "test_cer_using_lm": 2641,
2644
+ "test_cer_w/o_stress": 2642,
2645
+ "test_cer_with_lm": 2643,
2646
+ "test_cer_without_lm": 2644,
2647
+ "test_cher": 2645,
2648
+ "test_comet_bg->en": 2646,
2649
+ "test_comet_cs->en": 2647,
2650
+ "test_comet_da->en": 2648,
2651
+ "test_comet_de->en": 2649,
2652
+ "test_comet_el->en": 2650,
2653
+ "test_comet_en->bg": 2651,
2654
+ "test_comet_en->cs": 2652,
2655
+ "test_comet_en->da": 2653,
2656
+ "test_comet_en->de": 2654,
2657
+ "test_comet_en->el": 2655,
2658
+ "test_comet_en->es": 2656,
2659
+ "test_comet_en->et": 2657,
2660
+ "test_comet_en->fi": 2658,
2661
+ "test_comet_en->fr": 2659,
2662
+ "test_comet_en->hr": 2660,
2663
+ "test_comet_en->hu": 2661,
2664
+ "test_comet_en->it": 2662,
2665
+ "test_comet_en->lt": 2663,
2666
+ "test_comet_en->lv": 2664,
2667
+ "test_comet_en->mt": 2665,
2668
+ "test_comet_en->nl": 2666,
2669
+ "test_comet_en->pl": 2667,
2670
+ "test_comet_en->pt": 2668,
2671
+ "test_comet_en->ro": 2669,
2672
+ "test_comet_en->ru": 2670,
2673
+ "test_comet_en->sk": 2671,
2674
+ "test_comet_en->sl": 2672,
2675
+ "test_comet_en->sv": 2673,
2676
+ "test_comet_en->uk": 2674,
2677
+ "test_comet_es->en": 2675,
2678
+ "test_comet_et->en": 2676,
2679
+ "test_comet_fi->en": 2677,
2680
+ "test_comet_fr->en": 2678,
2681
+ "test_comet_hr->en": 2679,
2682
+ "test_comet_hu->en": 2680,
2683
+ "test_comet_it->en": 2681,
2684
+ "test_comet_lt->en": 2682,
2685
+ "test_comet_lv->en": 2683,
2686
+ "test_comet_mt->en": 2684,
2687
+ "test_comet_nl->en": 2685,
2688
+ "test_comet_pl->en": 2686,
2689
+ "test_comet_pt->en": 2687,
2690
+ "test_comet_ro->en": 2688,
2691
+ "test_comet_ru->en": 2689,
2692
+ "test_comet_sk->en": 2690,
2693
+ "test_comet_sl->en": 2691,
2694
+ "test_comet_sv->en": 2692,
2695
+ "test_comet_uk->en": 2693,
2696
+ "test_coraa_wer": 2694,
2697
+ "test_custom_cer_ctc": 2695,
2698
+ "test_custom_cer_rnnt": 2696,
2699
+ "test_custom_wer_ctc": 2697,
2700
+ "test_custom_wer_rnnt": 2698,
2701
+ "test_cver": 2699,
2702
+ "test_der": 2700,
2703
+ "test_em": 2701,
2704
+ "test_exact_match": 2702,
2705
+ "test_f1": 2703,
2706
+ "test_f1-score": 2704,
2707
+ "test_f1_callsign": 2705,
2708
+ "test_f1_command": 2706,
2709
+ "test_f1_macro": 2707,
2710
+ "test_f1_micro_on_coscan_speech": 2708,
2711
+ "test_f1_score": 2709,
2712
+ "test_f1_score_macro": 2710,
2713
+ "test_f1_score_weighted": 2711,
2714
+ "test_f1_value": 2712,
2715
+ "test_jaccard_error_rate": 2713,
2716
+ "test_loss": 2714,
2717
+ "test_macro_f1": 2715,
2718
+ "test_map": 2716,
2719
+ "test_mer": 2717,
2720
+ "test_micro_f1": 2718,
2721
+ "test_noresqa-mos_in-domain_training": 2719,
2722
+ "test_nynorsk_cer": 2720,
2723
+ "test_nynorsk_wer": 2721,
2724
+ "test_pearson_correlation_coefficient": 2722,
2725
+ "test_per": 2723,
2726
+ "test_per_in-domain_training_|": 2724,
2727
+ "test_per_on_common_voice_fr_13.0_|_trained": 2725,
2728
+ "test_per_on_multilingual_librispeech_fr_|_trained": 2726,
2729
+ "test_per_w/o_stress": 2727,
2730
+ "test_perplexity": 2728,
2731
+ "test_pr-auc": 2729,
2732
+ "test_precision": 2730,
2733
+ "test_precision_macro": 2731,
2734
+ "test_qwk": 2732,
2735
+ "test_recall": 2733,
2736
+ "test_recall_macro": 2734,
2737
+ "test_roc-auc": 2735,
2738
+ "test_rogue-1": 2736,
2739
+ "test_rogue-2": 2737,
2740
+ "test_rogue-l": 2738,
2741
+ "test_rogue-lsum": 2739,
2742
+ "test_rouge-1": 2740,
2743
+ "test_rouge-2": 2741,
2744
+ "test_rouge-l": 2742,
2745
+ "test_rouge-l_sum": 2743,
2746
+ "test_rouge1": 2744,
2747
+ "test_rouge1_fanpage": 2745,
2748
+ "test_rouge1_ilpost": 2746,
2749
+ "test_rouge2": 2747,
2750
+ "test_rouge2_fanpage": 2748,
2751
+ "test_rouge2_ilpost": 2749,
2752
+ "test_rougel": 2750,
2753
+ "test_rougel_fanpage": 2751,
2754
+ "test_rougel_ilpost": 2752,
2755
+ "test_runtime": 2753,
2756
+ "test_samples_per_second": 2754,
2757
+ "test_ser": 2755,
2758
+ "test_set_pass@1": 2756,
2759
+ "test_spearmanr": 2757,
2760
+ "test_squim-stoi_in-domain_training": 2758,
2761
+ "test_steps_per_second": 2759,
2762
+ "test_stoi_in-domain_training": 2760,
2763
+ "test_suite_sql_eval_-_exact_matching_accuracy": 2761,
2764
+ "test_suite_sql_eval_-_execution_accuracy": 2762,
2765
+ "test_weighted_accuracy": 2763,
2766
+ "test_wer": 2764,
2767
+ "test_wer_+lm": 2765,
2768
+ "test_wer_960ms_chunk_size_4_left_context_chunks": 2766,
2769
+ "test_wer_bg": 2767,
2770
+ "test_wer_cs": 2768,
2771
+ "test_wer_da": 2769,
2772
+ "test_wer_de": 2770,
2773
+ "test_wer_el": 2771,
2774
+ "test_wer_en": 2772,
2775
+ "test_wer_es": 2773,
2776
+ "test_wer_et": 2774,
2777
+ "test_wer_fi": 2775,
2778
+ "test_wer_fr": 2776,
2779
+ "test_wer_hr": 2777,
2780
+ "test_wer_hu": 2778,
2781
+ "test_wer_it": 2779,
2782
+ "test_wer_lt": 2780,
2783
+ "test_wer_lv": 2781,
2784
+ "test_wer_mls": 2782,
2785
+ "test_wer_mt": 2783,
2786
+ "test_wer_nl": 2784,
2787
+ "test_wer_no_lm": 2785,
2788
+ "test_wer_non-streaming_greedy": 2786,
2789
+ "test_wer_on_common_voice_7": 2787,
2790
+ "test_wer_p&c": 2788,
2791
+ "test_wer_pl": 2789,
2792
+ "test_wer_pt": 2790,
2793
+ "test_wer_ro": 2791,
2794
+ "test_wer_ru": 2792,
2795
+ "test_wer_sk": 2793,
2796
+ "test_wer_sl": 2794,
2797
+ "test_wer_sv": 2795,
2798
+ "test_wer_uk": 2796,
2799
+ "test_wer_using_lm": 2797,
2800
+ "test_wer_with_language_model": 2798,
2801
+ "test_wer_with_lm": 2799,
2802
+ "test_wer_without_lm": 2800,
2803
+ "test_wil": 2801,
2804
+ "test_wip": 2802,
2805
+ "text-to-video_r@1": 2803,
2806
+ "text-to-video_r@10": 2804,
2807
+ "text_retrieval_r@1": 2805,
2808
+ "text_score": 2806,
2809
+ "thai_exam_acc": 2807,
2810
+ "think_step_length": 2808,
2811
+ "three_pixel_error": 2809,
2812
+ "threshold": 2810,
2813
+ "throughput_tps_on_h100": 2811,
2814
+ "tim_partial_f1": 2812,
2815
+ "tim_strict_f1": 2813,
2816
+ "time_mean": 2814,
2817
+ "time_ms": 2815,
2818
+ "time_std": 2816,
2819
+ "tm": 2817,
2820
+ "tn": 2818,
2821
+ "token-level_f1": 2819,
2822
+ "token-level_jaccard_similarity": 2820,
2823
+ "token_accuracy": 2821,
2824
+ "token_accuracy_all": 2822,
2825
+ "token_accuracy_ambiguous": 2823,
2826
+ "token_f1": 2824,
2827
+ "token_reduction_vs_character-level_%": 2825,
2828
+ "token_reduction_vs_v6.5_%": 2826,
2829
+ "token_scores_/_adresse_/_f1": 2827,
2830
+ "token_scores_/_adresse_/_precision": 2828,
2831
+ "token_scores_/_adresse_/_recall": 2829,
2832
+ "token_scores_/_adresse_/_redact": 2830,
2833
+ "token_scores_/_adresse_/_redact_full": 2831,
2834
+ "token_scores_/_date_/_f1": 2832,
2835
+ "token_scores_/_date_/_precision": 2833,
2836
+ "token_scores_/_date_/_recall": 2834,
2837
+ "token_scores_/_date_/_redact": 2835,
2838
+ "token_scores_/_date_/_redact_full": 2836,
2839
+ "token_scores_/_date_naissance_/_f1": 2837,
2840
+ "token_scores_/_date_naissance_/_precision": 2838,
2841
+ "token_scores_/_date_naissance_/_recall": 2839,
2842
+ "token_scores_/_date_naissance_/_redact": 2840,
2843
+ "token_scores_/_date_naissance_/_redact_full": 2841,
2844
+ "token_scores_/_disease_/_f1": 2842,
2845
+ "token_scores_/_disease_/_precision": 2843,
2846
+ "token_scores_/_disease_/_recall": 2844,
2847
+ "token_scores_/_ipp_/_f1": 2845,
2848
+ "token_scores_/_ipp_/_precision": 2846,
2849
+ "token_scores_/_ipp_/_recall": 2847,
2850
+ "token_scores_/_ipp_/_redact": 2848,
2851
+ "token_scores_/_ipp_/_redact_full": 2849,
2852
+ "token_scores_/_mail_/_f1": 2850,
2853
+ "token_scores_/_mail_/_precision": 2851,
2854
+ "token_scores_/_mail_/_recall": 2852,
2855
+ "token_scores_/_mail_/_redact": 2853,
2856
+ "token_scores_/_mail_/_redact_full": 2854,
2857
+ "token_scores_/_medication_/_f1": 2855,
2858
+ "token_scores_/_medication_/_precision": 2856,
2859
+ "token_scores_/_medication_/_recall": 2857,
2860
+ "token_scores_/_micro_/_f1": 2858,
2861
+ "token_scores_/_micro_/_precision": 2859,
2862
+ "token_scores_/_micro_/_recall": 2860,
2863
+ "token_scores_/_micro_/_redact": 2861,
2864
+ "token_scores_/_micro_/_redact_full": 2862,
2865
+ "token_scores_/_nda_/_f1": 2863,
2866
+ "token_scores_/_nda_/_precision": 2864,
2867
+ "token_scores_/_nda_/_recall": 2865,
2868
+ "token_scores_/_nda_/_redact": 2866,
2869
+ "token_scores_/_nda_/_redact_full": 2867,
2870
+ "token_scores_/_nom_/_f1": 2868,
2871
+ "token_scores_/_nom_/_precision": 2869,
2872
+ "token_scores_/_nom_/_recall": 2870,
2873
+ "token_scores_/_nom_/_redact": 2871,
2874
+ "token_scores_/_nom_/_redact_full": 2872,
2875
+ "token_scores_/_prenom_/_f1": 2873,
2876
+ "token_scores_/_prenom_/_precision": 2874,
2877
+ "token_scores_/_prenom_/_recall": 2875,
2878
+ "token_scores_/_prenom_/_redact": 2876,
2879
+ "token_scores_/_prenom_/_redact_full": 2877,
2880
+ "token_scores_/_procedure_/_f1": 2878,
2881
+ "token_scores_/_procedure_/_precision": 2879,
2882
+ "token_scores_/_procedure_/_recall": 2880,
2883
+ "token_scores_/_secu_/_f1": 2881,
2884
+ "token_scores_/_secu_/_precision": 2882,
2885
+ "token_scores_/_secu_/_recall": 2883,
2886
+ "token_scores_/_secu_/_redact": 2884,
2887
+ "token_scores_/_secu_/_redact_full": 2885,
2888
+ "token_scores_/_symptom_/_f1": 2886,
2889
+ "token_scores_/_symptom_/_precision": 2887,
2890
+ "token_scores_/_symptom_/_recall": 2888,
2891
+ "token_scores_/_tel_/_f1": 2889,
2892
+ "token_scores_/_tel_/_precision": 2890,
2893
+ "token_scores_/_tel_/_recall": 2891,
2894
+ "token_scores_/_tel_/_redact": 2892,
2895
+ "token_scores_/_tel_/_redact_full": 2893,
2896
+ "token_scores_/_ville_/_f1": 2894,
2897
+ "token_scores_/_ville_/_precision": 2895,
2898
+ "token_scores_/_ville_/_recall": 2896,
2899
+ "token_scores_/_ville_/_redact": 2897,
2900
+ "token_scores_/_ville_/_redact_full": 2898,
2901
+ "token_scores_/_zip_/_f1": 2899,
2902
+ "token_scores_/_zip_/_precision": 2900,
2903
+ "token_scores_/_zip_/_recall": 2901,
2904
+ "token_scores_/_zip_/_redact": 2902,
2905
+ "token_scores_/_zip_/_redact_full": 2903,
2906
+ "tokenized_f1": 2904,
2907
+ "tokens": 2905,
2908
+ "tokens/second": 2906,
2909
+ "tokens_per_character_compounds": 2907,
2910
+ "tokens_per_character_overall": 2908,
2911
+ "tokens_per_character_real_news": 2909,
2912
+ "tokens_per_second": 2910,
2913
+ "tokens_per_second_baseline_a100_fp16_512_tok": 2911,
2914
+ "tokens_per_second_ibce_a100_fp16_512_tok": 2912,
2915
+ "top-1": 2913,
2916
+ "top-1_%": 2914,
2917
+ "top-1_acc._%": 2915,
2918
+ "top-1_acc_%": 2916,
2919
+ "top-1_accuracy": 2917,
2920
+ "top-1_accuracy_%": 2918,
2921
+ "top-1_error_rate": 2919,
2922
+ "top-2_accuracy": 2920,
2923
+ "top-3-accuracy": 2921,
2924
+ "top-3_accuracy": 2922,
2925
+ "top-5_accuracy": 2923,
2926
+ "top1_acc": 2924,
2927
+ "top_1_accuracy": 2925,
2928
+ "top_1_accuracy_dynamic_quantized_wi8_afp32": 2926,
2929
+ "top_1_accuracy_full_precision": 2927,
2930
+ "top_5_accuracy": 2928,
2931
+ "top_5_accuracy_dynamic_quantized_wi8_afp32": 2929,
2932
+ "top_5_accuracy_full_precision": 2930,
2933
+ "total": 2931,
2934
+ "total_column_score": 2932,
2935
+ "total_cores": 2933,
2936
+ "total_flops": 2934,
2937
+ "total_model_size_gb": 2935,
2938
+ "total_precision": 2936,
2939
+ "total_recall": 2937,
2940
+ "total_reward_mean": 2938,
2941
+ "total_steps": 2939,
2942
+ "total_time_in_seconds": 2940,
2943
+ "total_timesteps": 2941,
2944
+ "toxicity_rito": 2942,
2945
+ "tp": 2943,
2946
+ "traffic_vocabulary_coverage": 2944,
2947
+ "train_accuracy": 2945,
2948
+ "train_loss": 2946,
2949
+ "train_mse": 2947,
2950
+ "train_runtime_seconds": 2948,
2951
+ "training_accuracy": 2949,
2952
+ "training_done": 2950,
2953
+ "training_flops": 2951,
2954
+ "training_loss": 2952,
2955
+ "training_loss_final": 2953,
2956
+ "training_pearson_cosine": 2954,
2957
+ "training_progress_%": 2955,
2958
+ "training_steps": 2956,
2959
+ "transcription_accuracy": 2957,
2960
+ "translation_bleu_score": 2958,
2961
+ "transliteration_-_character_accuracy": 2959,
2962
+ "transliteration_-_exact_match_accuracy": 2960,
2963
+ "treatment_f1-score": 2961,
2964
+ "treatment_precision": 2962,
2965
+ "treatment_recall": 2963,
2966
+ "trg2src_accuracy": 2964,
2967
+ "triplet_accuracy": 2965,
2968
+ "true_accuracy": 2966,
2969
+ "trueskill": 2967,
2970
+ "truthfulqa": 2968,
2971
+ "truthfulqa_0-shot": 2969,
2972
+ "trv_tegu_->_zho_hant_zh": 2970,
2973
+ "trv_truk_->_zho_hant_zh": 2971,
2974
+ "tube-boundary_ap": 2972,
2975
+ "ud_jaccard": 2973,
2976
+ "unfair-tos": 2974,
2977
+ "unique_preference_rate": 2975,
2978
+ "unlabeled_attachment_score_uas": 2976,
2979
+ "unlabeled_sentiment_tuple_f1": 2977,
2980
+ "unlabelled_attachment_score": 2978,
2981
+ "unproven_accuracy": 2979,
2982
+ "unweighted_accuracy_ua": 2980,
2983
+ "upos": 2981,
2984
+ "upos_accuracy": 2982,
2985
+ "v-measure": 2983,
2986
+ "v-measure_main": 2984,
2987
+ "v-measure_sub": 2985,
2988
+ "v_measure": 2986,
2989
+ "v_measure_std": 2987,
2990
+ "val": 2988,
2991
+ "val_acc": 2989,
2992
+ "val_f1_score": 2990,
2993
+ "val_miou": 2991,
2994
+ "val_pass@1": 2992,
2995
+ "val_per": 2993,
2996
+ "val_per_on_common_voice_fr_13.0_|_trained": 2994,
2997
+ "val_per_on_multilingual_librispeech_fr_|_trained": 2995,
2998
+ "validation_accuracy": 2996,
2999
+ "validation_accuracy_on_coscan_speech": 2997,
3000
+ "validation_accuracy_subset_experiment": 2998,
3001
+ "validation_bleu": 2999,
3002
+ "validation_cer": 3000,
3003
+ "validation_cer_with_5-gram_lm": 3001,
3004
+ "validation_cross-entropy": 3002,
3005
+ "validation_dev_overall": 3003,
3006
+ "validation_f1": 3004,
3007
+ "validation_f1_micro_on_coscan_speech": 3005,
3008
+ "validation_loss": 3006,
3009
+ "validation_loss_best": 3007,
3010
+ "validation_loss_final": 3008,
3011
+ "validation_loss_subset_experiment": 3009,
3012
+ "validation_macro_f1": 3010,
3013
+ "validation_mae": 3011,
3014
+ "validation_matched_accuracy": 3012,
3015
+ "validation_matched_f1": 3013,
3016
+ "validation_miou": 3014,
3017
+ "validation_mismatched_accuracy": 3015,
3018
+ "validation_mismatched_f1": 3016,
3019
+ "validation_nli_cosine_accuracy": 3017,
3020
+ "validation_nli_cosine_accuracy_threshold": 3018,
3021
+ "validation_nli_cosine_ap": 3019,
3022
+ "validation_nli_cosine_f1": 3020,
3023
+ "validation_nli_cosine_f1_threshold": 3021,
3024
+ "validation_nli_cosine_mcc": 3022,
3025
+ "validation_nli_cosine_precision": 3023,
3026
+ "validation_nli_cosine_recall": 3024,
3027
+ "validation_perplexity": 3025,
3028
+ "validation_perplexity_approx.": 3026,
3029
+ "validation_r^2": 3027,
3030
+ "validation_rmse_best_run_internal_autogluon_validation": 3028,
3031
+ "validation_rmsle": 3029,
3032
+ "validation_rogue-1": 3030,
3033
+ "validation_rogue-1.": 3031,
3034
+ "validation_rogue-2": 3032,
3035
+ "validation_rogue-l": 3033,
3036
+ "validation_rogue-l-sum": 3034,
3037
+ "validation_rogue-lsum": 3035,
3038
+ "validation_rouge-1": 3036,
3039
+ "validation_rouge-2": 3037,
3040
+ "validation_rouge-l": 3038,
3041
+ "validation_rouge-l_sum": 3039,
3042
+ "validation_rte_cosine_accuracy": 3040,
3043
+ "validation_rte_cosine_accuracy_threshold": 3041,
3044
+ "validation_rte_cosine_ap": 3042,
3045
+ "validation_rte_cosine_f1": 3043,
3046
+ "validation_rte_cosine_f1_threshold": 3044,
3047
+ "validation_rte_cosine_mcc": 3045,
3048
+ "validation_rte_cosine_precision": 3046,
3049
+ "validation_rte_cosine_recall": 3047,
3050
+ "validation_sts_pearson_cosine": 3048,
3051
+ "validation_sts_spearman_cosine": 3049,
3052
+ "validation_wer": 3050,
3053
+ "validation_wer_with_5-gram_lm": 3051,
3054
+ "variant_aggregation": 3052,
3055
+ "vdcscore": 3053,
3056
+ "vdd": 3054,
3057
+ "viewpoint_i_aepe": 3055,
3058
+ "vocab_size": 3056,
3059
+ "vocabulary_size": 3057,
3060
+ "voxceleb_dev": 3058,
3061
+ "vqa_ablation": 3059,
3062
+ "vram_reduction_%": 3060,
3063
+ "vs_base_model": 3061,
3064
+ "vtab": 3062,
3065
+ "v\u2011measure_main/sub": 3063,
3066
+ "wacc": 3064,
3067
+ "wb": 3065,
3068
+ "wbscore": 3066,
3069
+ "weed_precision": 3067,
3070
+ "weighted-f1": 3068,
3071
+ "weighted-f1_score": 3069,
3072
+ "weighted_accuarcy": 3070,
3073
+ "weighted_accuracy": 3071,
3074
+ "weighted_average_f1-score": 3072,
3075
+ "weighted_f1": 3073,
3076
+ "weighted_f1-score": 3074,
3077
+ "weighted_f1-score_logistic_regression": 3075,
3078
+ "weighted_f1-score_svc": 3076,
3079
+ "weighted_f1_score": 3077,
3080
+ "weighted_precision": 3078,
3081
+ "weighted_precision_svc": 3079,
3082
+ "weighted_recall": 3080,
3083
+ "weighted_recall_svc": 3081,
3084
+ "well-structured_stories": 3082,
3085
+ "wer": 3083,
3086
+ "wer%": 3084,
3087
+ "wer_%": 3085,
3088
+ "wer_1.12s_frame_size": 3086,
3089
+ "wer_beam_5": 3087,
3090
+ "wer_catalan": 3088,
3091
+ "wer_documentaries": 3089,
3092
+ "wer_english_-_combined": 3090,
3093
+ "wer_for_arabic": 3091,
3094
+ "wer_greedy": 3092,
3095
+ "wer_indonesian_-_combined": 3093,
3096
+ "wer_lm": 3094,
3097
+ "wer_news": 3095,
3098
+ "wer_normalized": 3096,
3099
+ "wer_on_common_voice_17.0": 3097,
3100
+ "wer_orthographic": 3098,
3101
+ "wer_raw": 3099,
3102
+ "wer_reference_column:_raw_transcription": 3100,
3103
+ "wer_reference_column:_transcription": 3101,
3104
+ "wer_seed_42_-_split_1": 3102,
3105
+ "wer_seed_42_-_split_2": 3103,
3106
+ "wer_seed_42_-_split_3": 3104,
3107
+ "wer_seed_43_-_split_1": 3105,
3108
+ "wer_seed_43_-_split_2": 3106,
3109
+ "wer_seed_43_-_split_3": 3107,
3110
+ "wer_seed_44_-_split_1": 3108,
3111
+ "wer_seed_44_-_split_2": 3109,
3112
+ "wer_seed_44_-_split_3": 3110,
3113
+ "wer_spanish": 3111,
3114
+ "wer_test": 3112,
3115
+ "wer_unnormalized": 3113,
3116
+ "wer_validation": 3114,
3117
+ "wer_with_punctuation_and_capital_letters": 3115,
3118
+ "wer_without_normalization": 3116,
3119
+ "wer_without_punctuation": 3117,
3120
+ "wer_word_error_rate": 3118,
3121
+ "wiki_split": 3119,
3122
+ "wil": 3120,
3123
+ "wildguard_total_f1": 3121,
3124
+ "willingness_to_answer": 3122,
3125
+ "win-rate": 3123,
3126
+ "win_rate": 3124,
3127
+ "win_rate_%": 3125,
3128
+ "win_rate_vs_base_model_llm-as-judge": 3126,
3129
+ "win_rate_vs_baseline_claude_3.5_sonnet_blind_a/b_n=42": 3127,
3130
+ "win_rate_vs_baseline_claude_haiku_4.5_blind_a/b_n=15": 3128,
3131
+ "win_rate_vs_baseline_claude_haiku_4.5_blind_a/b_n=57": 3129,
3132
+ "win_rate_vs_baseline_claude_opus_4_blind_a/b_n=15": 3130,
3133
+ "win_rate_vs_baseline_claude_opus_4_blind_a/b_n=57": 3131,
3134
+ "win_rate_vs_baseline_claude_sonnet_4_blind_a/b_n=42": 3132,
3135
+ "win_rate_vs_baseline_gemini_2.5_flash_lite_blind_a/b_n=57": 3133,
3136
+ "win_rate_vs_baseline_gpt-4o_blind_a/b_n=57": 3134,
3137
+ "win_rate_vs_baseline_overall_claude_judges_blind_a/b_n=57": 3135,
3138
+ "winogrande": 3136,
3139
+ "winogrande_0-shot": 3137,
3140
+ "winogrande_5-shot": 3138,
3141
+ "winogrande_rc": 3139,
3142
+ "wip": 3140,
3143
+ "word-count_constraint_accuracy_120-150": 3141,
3144
+ "word_accuracy": 3142,
3145
+ "word_accuracy_oov": 3143,
3146
+ "word_error_rate": 3144,
3147
+ "word_error_rate_all_data": 3145,
3148
+ "word_error_rate_eslo": 3146,
3149
+ "word_error_rate_langage": 3147,
3150
+ "word_error_rate_wer": 3148,
3151
+ "word_error_rate_wer_%": 3149,
3152
+ "word_error_rate_with_limited_vocabulary": 3150,
3153
+ "worst_group_accuracy": 3151,
3154
+ "writing": 3152,
3155
+ "xpos_accuracy": 3153,
3156
+ "xstest_f1": 3154,
3157
+ "yes/no_accuracy": 3155,
3158
+ "zero-shot_accuracy": 3156,
3159
+ "zero-shot_clip_accuracy": 3157,
3160
+ "zero-shot_precision": 3158,
3161
+ "zero-shot_recall": 3159,
3162
+ "zero-shot_top-1_acc._%": 3160,
3163
+ "zero-shot_top-1_acc_%": 3161,
3164
+ "zero-shot_transfer": 3162,
3165
+ "zeroth-test-bleu": 3163,
3166
+ "zeroth-test-cer": 3164,
3167
+ "zeroth-test-wer": 3165,
3168
+ "zho_hant_->_ami_xiug_13a": 3166,
3169
+ "zho_hant_->_trv_tegu_13a": 3167,
3170
+ "zho_hant_->_trv_truk_13a": 3168,
3171
+ "\u03c00": 3169,
3172
+ "\u0627\u062d\u0633\u0627\u0646_compliance": 3170,
3173
+ "\u226490%ile": 3171
3174
+ }
data/task2id.json ADDED
@@ -0,0 +1,2553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0-shot": 0,
3
+ "0-shot CoT": 1,
4
+ "0-shot, CoT": 2,
5
+ "1-shot": 3,
6
+ "10-shot": 4,
7
+ "2-shot": 5,
8
+ "2-shot, CoT": 6,
9
+ "25-shot": 7,
10
+ "2D Human Pose Estimation": 8,
11
+ "2D Object Detection": 9,
12
+ "2D Pose Estimation": 10,
13
+ "2D image classification": 11,
14
+ "2R. Avg.": 12,
15
+ "3-5-shot": 13,
16
+ "3-shot": 14,
17
+ "3-shot, CoT": 15,
18
+ "3D Face Reconstruction": 16,
19
+ "3D Human Pose Estimation": 17,
20
+ "3D Instance Segmentation": 18,
21
+ "3D Multi-Object Tracking": 19,
22
+ "3D Object Captioning": 20,
23
+ "3D Object Classification": 21,
24
+ "3D Object Detection": 22,
25
+ "3D Open-Vocabulary Instance Segmentation": 23,
26
+ "3D Point Cloud Classification": 24,
27
+ "3D Pose Estimation": 25,
28
+ "3D Reconstruction": 26,
29
+ "3D Semantic Scene Completion": 27,
30
+ "3D Semantic Segmentation": 28,
31
+ "3D Shape Reconstruction": 29,
32
+ "3D radiology image classification": 30,
33
+ "3DSR": 31,
34
+ "4-Class: (Benign, Defacement, Phishing, Malware)": 32,
35
+ "4-shot": 33,
36
+ "4-shot, maj@4": 34,
37
+ "4D Panoptic Segmentation": 35,
38
+ "5-shot": 36,
39
+ "5-shot, CoT": 37,
40
+ "6D Pose Estimation": 38,
41
+ "7-shot": 39,
42
+ "8-shot": 40,
43
+ "8-shot, CoT": 41,
44
+ "8-shot, maj@8": 42,
45
+ "AGIEval": 43,
46
+ "AI Text Detection": 44,
47
+ "AI-Generated Text Detection": 45,
48
+ "AI2 ARC (Challenge)": 46,
49
+ "AI2 ARC (Easy)": 47,
50
+ "ARC": 48,
51
+ "ARC Challenge": 49,
52
+ "ARC Prize 2025 (legacy evaluation mapping)": 50,
53
+ "ARC-Challenge": 51,
54
+ "ARC-Easy": 52,
55
+ "ARC_C": 53,
56
+ "ARC_E": 54,
57
+ "ASR": 55,
58
+ "AST (0-shot, English-Korean)": 56,
59
+ "Abstract Algebra": 57,
60
+ "Abstract reasoning challenge": 58,
61
+ "Abstractive Dialogue Summarization": 59,
62
+ "Abstractive Question Answering": 60,
63
+ "Abstractive Summarization": 61,
64
+ "Abstractive Text Summarization": 62,
65
+ "Accented Speech Recognition": 63,
66
+ "Acoustic Scene Classification": 64,
67
+ "Action Detection": 65,
68
+ "Action Recognition": 66,
69
+ "Action Recognition In Videos": 67,
70
+ "Action Segmentation": 68,
71
+ "Ad-Hoc Information Retrieval": 69,
72
+ "Adversarial NLI": 70,
73
+ "Adversarial Robustness": 71,
74
+ "Agentic": 72,
75
+ "Alignment": 73,
76
+ "Alignment Faking Detection": 74,
77
+ "All-in-One Image Restoration": 75,
78
+ "Amazon Review Classification": 76,
79
+ "AmazonCounterfactualClassification": 77,
80
+ "AmazonReviewsClassification": 78,
81
+ "American Invitational Mathematics Examination": 79,
82
+ "Analogy Questions (BATS)": 80,
83
+ "Analogy Questions (ConceptNet Analogy)": 81,
84
+ "Analogy Questions (Google)": 82,
85
+ "Analogy Questions (NELL-ONE Analogy)": 83,
86
+ "Analogy Questions (SAT full)": 84,
87
+ "Analogy Questions (SAT)": 85,
88
+ "Analogy Questions (TREX Analogy)": 86,
89
+ "Analogy Questions (U2)": 87,
90
+ "Analogy Questions (U4)": 88,
91
+ "Animal Pose Estimation": 89,
92
+ "Anomaly Detection": 90,
93
+ "Arabic AI Text Detection": 91,
94
+ "Arabic to English Translation": 92,
95
+ "Argument Mining": 93,
96
+ "Arithmetic Reasoning": 94,
97
+ "ArxivQA": 95,
98
+ "Aspect-Based Sentiment Analysis (ABSA)": 96,
99
+ "Atari Games": 97,
100
+ "Atomic action recognition": 98,
101
+ "Attacks on Democratic Basic Order Detection": 99,
102
+ "Audio Classification": 100,
103
+ "Audio Emotion Classification": 101,
104
+ "Audio Emotion Recognition": 102,
105
+ "Audio Generation": 103,
106
+ "Audio Retrieval": 104,
107
+ "Audio Source Separation": 105,
108
+ "Audio Super-Resolution": 106,
109
+ "Audio Tagging": 107,
110
+ "Audio captioning": 108,
111
+ "Authorship Verification": 109,
112
+ "Auto Debugging": 110,
113
+ "Automated Theorem Proving": 111,
114
+ "Automatic Phoneme Recognition": 112,
115
+ "Automatic Speech Recognition": 113,
116
+ "Average": 114,
117
+ "BBH": 115,
118
+ "BLEU": 116,
119
+ "Bandwidth Extension": 117,
120
+ "Battery Insertion": 118,
121
+ "Beta-secretase Inhibition": 119,
122
+ "Bias Detection": 120,
123
+ "Biblical Hebrew Vocalization": 121,
124
+ "Binary Classification": 122,
125
+ "Binary Image Classification": 123,
126
+ "Binary OHCA detection (OHCA vs non-OHCA)": 124,
127
+ "Binary Propaganda Detection": 125,
128
+ "Binary Text Classification (Autoimmune Neurology)": 126,
129
+ "Binary text classification": 127,
130
+ "Binary: (Legit vs Spam Email)": 128,
131
+ "Biomedical Information Retrieval": 129,
132
+ "Biomedical QA (Chinese)": 130,
133
+ "Biomedical QA (PubMedQA)": 131,
134
+ "BitextMining": 132,
135
+ "Blind Face Restoration": 133,
136
+ "Blind Reconstruction (2-pass)": 134,
137
+ "Blood-Brain Barrier": 135,
138
+ "BoolQ": 136,
139
+ "BoolQ Question Answering": 137,
140
+ "Brain Tumor Classification": 138,
141
+ "Brain Tumor Detection": 139,
142
+ "Breast Cancer Histology Image Classification": 140,
143
+ "Breast Tumour Classification": 141,
144
+ "Bug-fix Patch Generation": 142,
145
+ "Business Intelligence Engine": 143,
146
+ "C-Eval (valid)": 144,
147
+ "COVID-19 Diagnosis": 145,
148
+ "CSQA": 146,
149
+ "CV-Bench": 147,
150
+ "Call to Action Detection": 148,
151
+ "Camera Pose Estimation": 149,
152
+ "Camouflaged Object Segmentation": 150,
153
+ "Cancer Image Classification": 151,
154
+ "Car Damage Detection": 152,
155
+ "CartPole-v1": 153,
156
+ "Caselaw Retrieval": 154,
157
+ "CatalanQA": 155,
158
+ "Categorical Classification (CC)": 156,
159
+ "Categorical Pair Similarity (CPS)": 157,
160
+ "Category Clustering": 158,
161
+ "Causal Language Modeling": 159,
162
+ "Cell Type Prediction": 160,
163
+ "Character Plot Arc Classification": 161,
164
+ "Chart Question Answering": 162,
165
+ "Chart reasoning": 163,
166
+ "Chat": 164,
167
+ "Chat & Instruction Following": 165,
168
+ "Cheese Texture Classification": 166,
169
+ "Chest X-ray report generation": 167,
170
+ "Chinese": 168,
171
+ "Citation Classification": 169,
172
+ "Claim Checkworthiness Detection": 170,
173
+ "Clasificación de reseñas (5 clases)": 171,
174
+ "Clasificación de texto": 172,
175
+ "Class-Specific Performance": 173,
176
+ "Classification": 174,
177
+ "Classification (ROC AUC)": 175,
178
+ "Classification Tasks": 176,
179
+ "Classify an image of chart to one of the following types: line, scatter, dot, vertical_bar, or horizontal_bar.": 177,
180
+ "Clickbait Detection": 178,
181
+ "Climate NLP Tasks (ClimaBench)": 179,
182
+ "Climate logical fallacy classification": 180,
183
+ "Clinical NER": 181,
184
+ "Clinical Note Embeddings": 182,
185
+ "Clinical Operations": 183,
186
+ "Clinical Support": 184,
187
+ "Clinical Text Embeddings": 185,
188
+ "Clinical Trial Comprehension": 186,
189
+ "Clustering": 187,
190
+ "CoQA": 188,
191
+ "Code": 189,
192
+ "Code Completion": 190,
193
+ "Code Documentation Generation": 191,
194
+ "Code Generation": 192,
195
+ "Code Reranking": 193,
196
+ "Code Retrieval": 194,
197
+ "Code Search": 195,
198
+ "Code generation": 196,
199
+ "Code generation and completion": 197,
200
+ "Coding": 198,
201
+ "Coherence-Momentum": 199,
202
+ "Col BERTTriplet": 200,
203
+ "Colorectal Gland Segmentation:": 201,
204
+ "Common Sense": 202,
205
+ "Common Sense Reasoning": 203,
206
+ "Commonsense": 204,
207
+ "Commonsense Reasoning": 205,
208
+ "Commonsense Understanding": 206,
209
+ "Commonsense natural language inference": 207,
210
+ "Conditional Generation": 208,
211
+ "Conditional Image Generation": 209,
212
+ "Confidence (Low/Medium/High)": 210,
213
+ "Contemporary-lb": 211,
214
+ "Contract clause classification": 212,
215
+ "Contracts Retrieval": 213,
216
+ "Contrastive Learning": 214,
217
+ "Conversation Summarization": 215,
218
+ "Conversational": 216,
219
+ "Conversational Response Retrieval": 217,
220
+ "Conversational Web Navigation": 218,
221
+ "Conversational and Function Calling": 219,
222
+ "Core Reasoning Tasks": 220,
223
+ "Coreference Resolution": 221,
224
+ "Coreference resolution": 222,
225
+ "Cough Classification": 223,
226
+ "Crisis Detection": 224,
227
+ "Crop Classification": 225,
228
+ "Crop Recommendation": 226,
229
+ "Cross Encoder Binary Classification": 227,
230
+ "Cross Encoder Classification": 228,
231
+ "Cross Encoder Correlation": 229,
232
+ "Cross Encoder Nano BEIR": 230,
233
+ "Cross Encoder Reranking": 231,
234
+ "Cross Encoder Softmax Accuracy": 232,
235
+ "Cross-Lingual Document Retrieval": 233,
236
+ "Cross-Lingual Transfer": 234,
237
+ "Cross-Modal Retrieval": 235,
238
+ "Cuisine (20 classes)": 236,
239
+ "Cultural Vocal Bursts Intensity Prediction": 237,
240
+ "Curated Test Samples": 238,
241
+ "Curiosity-driven Exploration": 239,
242
+ "Custom Information Retrieval": 240,
243
+ "Custom Triplet": 241,
244
+ "Customer Support Response Generation": 242,
245
+ "Cyberbullying Moderation (label + type)": 243,
246
+ "Cytotoxicity Prediction from Molecular Structure": 244,
247
+ "Cytotoxicity Prediction from Promiscuity": 245,
248
+ "DROP": 246,
249
+ "Danish EURLEX (Level 2)": 247,
250
+ "Data Augmentation": 248,
251
+ "Data-to-Text Generation": 249,
252
+ "Deblurring": 250,
253
+ "DeepFake Detection": 251,
254
+ "Deepfake Detection": 252,
255
+ "Definition Retrieval": 253,
256
+ "Dense Pixel Correspondence Estimation": 254,
257
+ "Dependency Parsing": 255,
258
+ "Description-guided molecule generation": 256,
259
+ "Detection Tasks": 257,
260
+ "DevOps Question Answering": 258,
261
+ "Device Aware Information Retrieval": 259,
262
+ "Dialog Navigation": 260,
263
+ "Discourse Parsing": 261,
264
+ "Disease Progression Classification (Longitudinal)": 262,
265
+ "DocVQA": 263,
266
+ "Document Classification": 264,
267
+ "Document Intelligence": 265,
268
+ "Document Layout Analysis": 266,
269
+ "Document Ranking": 267,
270
+ "Document Reranking": 268,
271
+ "Document Retrieval": 269,
272
+ "Document Summarization": 270,
273
+ "Document inconsistency detection (NLI-like)": 271,
274
+ "Document-Grounded QA": 272,
275
+ "Domain Adaptation": 273,
276
+ "Domain Generalization": 274,
277
+ "Domain Q&A": 275,
278
+ "Drilling Engineering AI": 276,
279
+ "Drug - Drug Interaction Classification": 277,
280
+ "Drug Discovery": 278,
281
+ "Drug-ADR Relation Extraction": 279,
282
+ "Dynamic Reconstruction": 280,
283
+ "ECG Report Generation": 281,
284
+ "Eastern Syriac Vocalization": 282,
285
+ "Educational Outcome Prediction": 283,
286
+ "Efficiency vs Baseline": 284,
287
+ "EgoSchema": 285,
288
+ "Email Classification": 286,
289
+ "Email Summarization": 287,
290
+ "Email Ticket Classification": 288,
291
+ "Embedding Synthesis over Long Context": 289,
292
+ "Emotion Analysis (Regression)": 290,
293
+ "Emotion Classification": 291,
294
+ "Emotion Classification in Czech": 292,
295
+ "Emotion Classification in German": 293,
296
+ "Emotion Classification in Hungarian": 294,
297
+ "Emotion Classification in Polish": 295,
298
+ "Emotion Classification in Slovak": 296,
299
+ "Emotion Classifier": 297,
300
+ "Emotion Detection": 298,
301
+ "Emotion Interpretation": 299,
302
+ "Emotion Recognition": 300,
303
+ "Emotion-Entailment": 301,
304
+ "Emotional Intelligence": 302,
305
+ "End-of-Turn Detection": 303,
306
+ "Energy Document Classification": 304,
307
+ "English": 305,
308
+ "English Document Retrieval": 306,
309
+ "English to Colloquial Tamil": 307,
310
+ "English to Marathi Translation": 308,
311
+ "English → Romanian": 309,
312
+ "English-Thai Translation Quality Assessment": 310,
313
+ "English-Thai Translation Quality Comparison": 311,
314
+ "English-Ukrainian Translation": 312,
315
+ "Entity Disambiguation": 313,
316
+ "Entity Linking": 314,
317
+ "Entity Resolution": 315,
318
+ "Entrepreneurial Readiness (low/medium/high)": 316,
319
+ "Event-based Object Segmentation": 317,
320
+ "Expert Routing": 318,
321
+ "Explanation Generation": 319,
322
+ "Extractive Question Answering": 320,
323
+ "Extractive Question-Answering": 321,
324
+ "Extractive Text Summarization": 322,
325
+ "Extreme Summarization": 323,
326
+ "Ezafe Detection": 324,
327
+ "F-16 longitudinal alpha tracking": 325,
328
+ "FLUE": 326,
329
+ "FQuAD": 327,
330
+ "Face Anti-Spoofing": 328,
331
+ "Face Detection": 329,
332
+ "Face Recognition": 330,
333
+ "Face Verification": 331,
334
+ "Facial Emotion Classification": 332,
335
+ "Facial Stress Level Prediction": 333,
336
+ "Fact Checking": 334,
337
+ "Fact Verification": 335,
338
+ "Factual Inconsistency Detection in Chart Captioning": 336,
339
+ "Factual accuracy": 337,
340
+ "Faithfulness Critic": 338,
341
+ "Fake News Detection": 339,
342
+ "Fake news classification (binary)": 340,
343
+ "Fallacy Detection": 341,
344
+ "Fashion Visual Search": 342,
345
+ "Feature Extraction": 343,
346
+ "Feedback Classification": 344,
347
+ "Few-Shot Image Classification": 345,
348
+ "Few-Shot Object Detection": 346,
349
+ "Few-Shot Semantic Segmentation": 347,
350
+ "Few-Shot Text Classification": 348,
351
+ "Fewshot Translation": 349,
352
+ "Fiction vs Non-Fiction Classification": 350,
353
+ "Field Classification": 351,
354
+ "Fill Mask": 352,
355
+ "Fill mask": 353,
356
+ "Fill-Mask": 354,
357
+ "Financial Advisory Generation": 355,
358
+ "Financial Compliance": 356,
359
+ "Financial Sentiment Analysis": 357,
360
+ "Financial Transaction Classification": 358,
361
+ "Financial Tweet Prediction": 359,
362
+ "Fine-Grained Image Classification": 360,
363
+ "Formal Logic": 361,
364
+ "Full Reconstruction (100%)": 362,
365
+ "Function Calling": 363,
366
+ "GPU Kernel Generation": 364,
367
+ "GSM8K": 365,
368
+ "GSM8K-Style Problems": 366,
369
+ "GSM8k": 367,
370
+ "GSM8k Mathematical Reasoning": 368,
371
+ "Gender Classification": 369,
372
+ "General": 370,
373
+ "General Domains": 371,
374
+ "General Knowledge": 372,
375
+ "General Multimodal": 373,
376
+ "General QA": 374,
377
+ "General Reasoning": 375,
378
+ "General Writing": 376,
379
+ "Generation Tasks": 377,
380
+ "Generative 3D Object Classification": 378,
381
+ "Generative Visual Question Answering": 379,
382
+ "GermanSTSBenchmark": 380,
383
+ "Gibberish Detection": 381,
384
+ "Global-MMLU-Lite": 382,
385
+ "Graded IR": 383,
386
+ "Grammar Classification": 384,
387
+ "Grammatical Error Correction": 385,
388
+ "Graph Classification": 386,
389
+ "Graph Property Prediction": 387,
390
+ "Graph Regression": 388,
391
+ "HLE Math": 389,
392
+ "HSwag": 390,
393
+ "Hallucination Detection": 391,
394
+ "Handwritten Text Recognition": 392,
395
+ "Hanoi Tower Puzzle": 393,
396
+ "Hanoi Tower Puzzle (Subtask-based)": 394,
397
+ "Hate / Not Hate classification": 395,
398
+ "Hate Speech Detection": 396,
399
+ "Hate Speech Span Detection": 397,
400
+ "Hate speech classification": 398,
401
+ "Head Pose Recognition (Facing)": 399,
402
+ "Head Pose Recognition (Tilt)": 400,
403
+ "Head Pose Recognition (Up/Down)": 401,
404
+ "Health Coaching": 402,
405
+ "Health-Aware Recipe Generation": 403,
406
+ "HellaSwag": 404,
407
+ "Hellaswag Contextual Completions": 405,
408
+ "High School Computer Science": 406,
409
+ "High School Mathematics": 407,
410
+ "Histopathologic Cancer Detection": 408,
411
+ "Historic Text Normalization (type-level)": 409,
412
+ "HourVideo": 410,
413
+ "Human Instance Segmentation": 411,
414
+ "Human vs AI Text Classification": 412,
415
+ "Human vs AI Text Detection": 413,
416
+ "HumanEval": 414,
417
+ "Humor Detection": 415,
418
+ "IF": 416,
419
+ "IaC Generation": 417,
420
+ "Idea Difficulty (Low/Medium/High)": 418,
421
+ "Image Captioning": 419,
422
+ "Image Classification": 420,
423
+ "Image Clustering": 421,
424
+ "Image Deblurring": 422,
425
+ "Image Dehazing": 423,
426
+ "Image Description": 424,
427
+ "Image Document Retrieval": 425,
428
+ "Image Generation": 426,
429
+ "Image Inpainting": 427,
430
+ "Image Manipulation Detection": 428,
431
+ "Image Manipulation Localization": 429,
432
+ "Image Matching": 430,
433
+ "Image Matting": 431,
434
+ "Image Outpainting": 432,
435
+ "Image Reconstruction": 433,
436
+ "Image Registration": 434,
437
+ "Image Restoration": 435,
438
+ "Image Retrieval": 436,
439
+ "Image Segmentation": 437,
440
+ "Image Super-Resolution": 438,
441
+ "Image To Text": 439,
442
+ "Image-Classification": 440,
443
+ "Image-to-Image Translation": 441,
444
+ "Image-to-Text Retrieval": 442,
445
+ "ImageClassification": 443,
446
+ "Imitation Policy Evaluation": 444,
447
+ "In-Context Reinforcement Learning": 445,
448
+ "Incremental Learning": 446,
449
+ "Indic-NLI": 447,
450
+ "Indic-Paraphrase": 448,
451
+ "Indic-QA Evaluation": 449,
452
+ "Indic-Sentiment Analysis": 450,
453
+ "Industrial Quality Control": 451,
454
+ "InfoVQA": 452,
455
+ "Information Retrieval": 453,
456
+ "Instance Segmentation": 454,
457
+ "Instruct": 455,
458
+ "Instruction Following": 456,
459
+ "Instruction following": 457,
460
+ "InstructionRetrieval": 458,
461
+ "Instrument Recognition": 459,
462
+ "Intent Classification": 460,
463
+ "Interactive Segmentation": 461,
464
+ "Irony Detection": 462,
465
+ "JPEG Decompression": 463,
466
+ "JPRDY": 464,
467
+ "KG-to-Text Generation": 465,
468
+ "KLUE-STS": 466,
469
+ "KLUE-TC": 467,
470
+ "KSM": 468,
471
+ "Key Information Extraction": 469,
472
+ "Keyphrase Extraction": 470,
473
+ "Keyword Extraction": 471,
474
+ "Keyword Spotting": 472,
475
+ "Knowledge": 473,
476
+ "Knowledge & QA": 474,
477
+ "Knowledge Benchmarking": 475,
478
+ "Knowledge Distillation": 476,
479
+ "Knowledge Graphs": 477,
480
+ "Ko-StrategyQA": 478,
481
+ "KorSTS": 479,
482
+ "LABELED_DEPENDENCIES": 480,
483
+ "LBHistoricalBitextMining": 481,
484
+ "LEMMA": 482,
485
+ "LSR": 483,
486
+ "Lane Detection": 484,
487
+ "Language Identification": 485,
488
+ "Language Modeling": 486,
489
+ "Language Modelling": 487,
490
+ "Language Sentiment Analysis": 488,
491
+ "Language Understanding": 489,
492
+ "Large Language Model": 490,
493
+ "Latent Diffusion Model for 3D": 491,
494
+ "Latent Diffusion Model for 3D - Pano": 492,
495
+ "Latent Diffusion Model for 3D - Super-Resolution": 493,
496
+ "Latent Diffusion Model for 3D-4C": 494,
497
+ "Legal Case Analysis": 495,
498
+ "Legal Document Retrieval": 496,
499
+ "Legal Document Summarization": 497,
500
+ "Legal Q&A (PT-PT)": 498,
501
+ "Lemmatisation": 499,
502
+ "Lexical Relation Classification (BLESS)": 500,
503
+ "Lexical Relation Classification (CogALexV)": 501,
504
+ "Lexical Relation Classification (EVALution)": 502,
505
+ "Lexical Relation Classification (K&H+N)": 503,
506
+ "Lexical Relation Classification (ROOT09)": 504,
507
+ "Lexical bias detection": 505,
508
+ "Linguistic Acceptability": 506,
509
+ "Linguistic Accuracy Evaluation": 507,
510
+ "Link Prediction": 508,
511
+ "Literary Explicitness Classification": 509,
512
+ "Logging": 510,
513
+ "Logical Reasoning": 511,
514
+ "Long Context": 512,
515
+ "Long Video Retrieval (Background Removed)": 513,
516
+ "Long context": 514,
517
+ "Long, Legal Document Summarization": 515,
518
+ "Long-Context Hallucination Detection": 516,
519
+ "Long-Context Understanding": 517,
520
+ "Long-horizon": 518,
521
+ "Long-tail Learning": 519,
522
+ "LongVideoBench": 520,
523
+ "Lung Nodule Detection": 521,
524
+ "MATH": 522,
525
+ "MBTI Personality Classification": 523,
526
+ "MC2, 10-shot": 524,
527
+ "MIRACL-Reranking": 525,
528
+ "MIRACL-Retrieval": 526,
529
+ "MMLU": 527,
530
+ "MMLU Knowledge Test": 528,
531
+ "MMLU-Pro": 529,
532
+ "MMR total": 530,
533
+ "MMVP": 531,
534
+ "MORPH": 532,
535
+ "MTOPDomainClassification": 533,
536
+ "MTOPIntentClassification": 534,
537
+ "MVBench": 535,
538
+ "Machine Translation": 536,
539
+ "Machine Translation (sa → en)": 537,
540
+ "Machine Translation Evaluation": 538,
541
+ "Manipulation Detection": 539,
542
+ "Market Direction Prediction": 540,
543
+ "Marketing Domain Q&A": 541,
544
+ "Masked Language Modeling": 542,
545
+ "Masked Language Modelling": 543,
546
+ "Masked Prediction (30%)": 544,
547
+ "Massive Multitask Language Understanding": 545,
548
+ "MassiveIntentClassification": 546,
549
+ "MassiveScenarioClassification": 547,
550
+ "Math": 548,
551
+ "Math Reasoning": 549,
552
+ "Math Word Problem Solving": 550,
553
+ "Math Word Problems": 551,
554
+ "Math word problems": 552,
555
+ "Mathematical Problem-Solving": 553,
556
+ "Mathematical Reasoning": 554,
557
+ "Mathematical Reasoning w/ Tools": 555,
558
+ "Mathematical problem solving": 556,
559
+ "Mathematical reasoning": 557,
560
+ "Mathematics": 558,
561
+ "Medical": 559,
562
+ "Medical Image Classification": 560,
563
+ "Medical Image Segmentation": 561,
564
+ "Medical Knowledge": 562,
565
+ "Medical Literature Search": 563,
566
+ "Medical Question Answering": 564,
567
+ "Medical SOAP Note Generation": 565,
568
+ "Medical Text Generation": 566,
569
+ "Meme Classification": 567,
570
+ "Memorization": 568,
571
+ "Military Audio Classification": 569,
572
+ "Misogyny Detection": 570,
573
+ "Misogyny Identification": 571,
574
+ "Model Compression": 572,
575
+ "Molecular Property Prediction": 573,
576
+ "Molecule Captioning": 574,
577
+ "Moment Retrieval": 575,
578
+ "Monocular Depth Estimation": 576,
579
+ "Monolingual Document Retrieval": 577,
580
+ "Morphological tagging (first subtoken)": 578,
581
+ "Motion Synthesis": 579,
582
+ "Multi Class Text Classification": 580,
583
+ "Multi Task Dev": 581,
584
+ "Multi-Head Text Regression": 582,
585
+ "Multi-Label Classification": 583,
586
+ "Multi-Label Emotion Classification": 584,
587
+ "Multi-Label Image Classification": 585,
588
+ "Multi-Label Intent Detection": 586,
589
+ "Multi-Label Text Classification": 587,
590
+ "Multi-Modal Hate Speech Detection": 588,
591
+ "Multi-Object Tracking": 589,
592
+ "Multi-Person Pose Estimation": 590,
593
+ "Multi-Source Reasoning (MUSR)": 591,
594
+ "Multi-class Classification": 592,
595
+ "Multi-class Text Classification": 593,
596
+ "Multi-label Emotion Classification": 594,
597
+ "Multi-label Fine-Grained Emotion Classification": 595,
598
+ "Multi-label Text Classification": 596,
599
+ "Multi-task language understanding": 597,
600
+ "Multi-tissue Nucleus Segmentation": 598,
601
+ "Multi-turn conversation": 599,
602
+ "Multi-turn conversation quality": 600,
603
+ "Multilabel Text Classification": 601,
604
+ "MultilabelClassification": 602,
605
+ "Multilingual": 603,
606
+ "Multilingual Emotion Classification": 604,
607
+ "Multilingual Math (MGSM)": 605,
608
+ "Multilingual QA": 606,
609
+ "Multilingual Retrieval": 607,
610
+ "Multilingual VLN": 608,
611
+ "Multimodal Code Generation": 609,
612
+ "Multimodal Emotion Recognition": 610,
613
+ "Multimodal Reasoning": 611,
614
+ "Multimodal medical knowledge and reasoning": 612,
615
+ "Multiple Choice": 613,
616
+ "Multiple Choice Question Answering": 614,
617
+ "Multiple Choice Question Generation": 615,
618
+ "Multiple Object Tracking": 616,
619
+ "Multiple-choice": 617,
620
+ "Multi‑Label Music Note Prediction": 618,
621
+ "Music Auto-Tagging": 619,
622
+ "Music Question Answering": 620,
623
+ "Music Source Separation": 621,
624
+ "Music Transcription": 622,
625
+ "My Binary Classification": 623,
626
+ "NER": 624,
627
+ "NER (9 tags)": 625,
628
+ "NER F1 Score": 626,
629
+ "NFCorpus": 627,
630
+ "NSFW/explicit content": 628,
631
+ "Named Entity Recognition": 629,
632
+ "Named Entity Recognition (Invoices)": 630,
633
+ "Named Entity Recognition (NER)": 631,
634
+ "Nano BEIR": 632,
635
+ "Narrative Genre Classification": 633,
636
+ "NatQs": 634,
637
+ "Natural Language Inference": 635,
638
+ "Natural Language Queries": 636,
639
+ "Natural Language Understanding": 637,
640
+ "Natural Language Visual Grounding": 638,
641
+ "Natural Language to Bash Translation": 639,
642
+ "Natural Lenguage Inference": 640,
643
+ "Natural language inference": 641,
644
+ "Negative Binomial GLM Parameter Estimation": 642,
645
+ "Nep-gLUE": 643,
646
+ "Nepali Speech Recognition": 644,
647
+ "Ner": 645,
648
+ "Network Pruning": 646,
649
+ "Neural Architecture Search": 647,
650
+ "News Classification": 648,
651
+ "News Summarization": 649,
652
+ "Node Classification": 650,
653
+ "Non-thinking": 651,
654
+ "OBQA": 652,
655
+ "OCR": 653,
656
+ "OMNI Math": 654,
657
+ "Object Categorization": 655,
658
+ "Object Counting": 656,
659
+ "Object Detection": 657,
660
+ "Object Localization": 658,
661
+ "Object Navigation": 659,
662
+ "Object Rearrangement": 660,
663
+ "Object Recognition": 661,
664
+ "Object Tracking": 662,
665
+ "Object visual presence verification": 663,
666
+ "Object-Oriented Navigation": 664,
667
+ "Online Beat Tracking": 665,
668
+ "Open Information Extraction": 666,
669
+ "Open Vocabulary Object Detection": 667,
670
+ "Open Vocabulary Panoptic Segmentation": 668,
671
+ "Open Vocabulary Semantic Segmentation": 669,
672
+ "Open-Domain Question Answering": 670,
673
+ "OpenAI Gym": 671,
674
+ "OpenAPI code completion": 672,
675
+ "OpenBookQA Facts": 673,
676
+ "Optical Character Recognition": 674,
677
+ "Optical Character Recognition (OCR)": 675,
678
+ "Optical Flow Estimation": 676,
679
+ "OrangeSum": 677,
680
+ "Osteoporosis Risk Prediction": 678,
681
+ "Out-of-Distribution Detection": 679,
682
+ "PDF-to-JSON Lab Test Data Conversion": 680,
683
+ "PII Masking": 681,
684
+ "PII Masking and Classification": 682,
685
+ "PII Routing": 683,
686
+ "PIQA": 684,
687
+ "PIQA Problem Solving": 685,
688
+ "POS": 686,
689
+ "POS Tagging": 687,
690
+ "Pair Classification": 688,
691
+ "PairClassification": 689,
692
+ "Pairwise Preference Ranking": 690,
693
+ "Panoptic Segmentation": 691,
694
+ "Paraphrase Detection": 692,
695
+ "Paraphrase Identification": 693,
696
+ "Paraphrase Mining": 694,
697
+ "Parking Space Occupancy": 695,
698
+ "Part of Speech Tagging": 696,
699
+ "Part-aware Panoptic Segmentation": 697,
700
+ "Part-of-Speech Tagging": 698,
701
+ "Participant Intervention Comparison Outcome Extraction": 699,
702
+ "Passage Ranking": 700,
703
+ "Passage Reranking": 701,
704
+ "Passage Retrieval": 702,
705
+ "Path Reconstruction": 703,
706
+ "Pedestrian Detection": 704,
707
+ "Perception Test": 705,
708
+ "Person Identification": 706,
709
+ "Person Re-Identification": 707,
710
+ "Personalized Image Generation": 708,
711
+ "Personalized Segmentation": 709,
712
+ "Phoneme Recognition": 710,
713
+ "Phrase Grounding": 711,
714
+ "PiQA": 712,
715
+ "Pick and Place": 713,
716
+ "Pitch Angle Tracking Control": 714,
717
+ "Planetary Recognition Lattice": 715,
718
+ "Plant Disease Classification": 716,
719
+ "Poems Annotation Generation": 717,
720
+ "Point Cloud Classification": 718,
721
+ "Point Cloud Segmentation": 719,
722
+ "Point Clouds": 720,
723
+ "Popular aggregated benchmark": 721,
724
+ "Pose Estimation": 722,
725
+ "Potato Late Blight Risk Classification": 723,
726
+ "Product Category Classification": 724,
727
+ "Professional Law": 725,
728
+ "Program synthesis": 726,
729
+ "Prompt Engineering": 727,
730
+ "Prompt Generation (Dev)": 728,
731
+ "Prompt Generation (Test)": 729,
732
+ "Prompt Harmfulness Classification": 730,
733
+ "Prompt Injection Detection": 731,
734
+ "Prompt Safety Classification": 732,
735
+ "Prompt injection detection": 733,
736
+ "Protein Design": 734,
737
+ "Protein Function Prediction": 735,
738
+ "Protein Secondary Structure Prediction": 736,
739
+ "Protein Structure Prediction": 737,
740
+ "Protocol Quality Assessment": 738,
741
+ "PubMedQA": 739,
742
+ "Py Late Information Retrieval": 740,
743
+ "PyTest edge-case unit test generation": 741,
744
+ "PyTest unit test generation": 742,
745
+ "Python Code Synthesis": 743,
746
+ "Python code generation": 744,
747
+ "QA": 745,
748
+ "QA (Span Extraction)": 746,
749
+ "QA (ViquiQuAD)": 747,
750
+ "QA (XQuAD)": 748,
751
+ "Quantization": 749,
752
+ "Question Answering": 750,
753
+ "Question Answering Classification": 751,
754
+ "Question Duplicate Detection": 752,
755
+ "Question Generation": 753,
756
+ "Question Pair Duplicate Detection": 754,
757
+ "Question-Answering": 755,
758
+ "RBC Shape Classification": 756,
759
+ "RE": 757,
760
+ "ROUGE-1": 758,
761
+ "RPG Art Generation": 759,
762
+ "RST-Pointer": 760,
763
+ "RZTKInformation Retrieval": 761,
764
+ "Radiology Document Retrieval": 762,
765
+ "Ranking": 763,
766
+ "Re-writing": 764,
767
+ "Reading Comprehension": 765,
768
+ "Reasoning": 766,
769
+ "Reasoning Quality Classification": 767,
770
+ "Receipt Entity Extraction": 768,
771
+ "Recognizing Emotion Cause in Conversations": 769,
772
+ "Referring Expression Grounding": 770,
773
+ "Referring Expression Segmentation": 771,
774
+ "Refusal Detection": 772,
775
+ "Region (5 classes)": 773,
776
+ "Region of interest detection": 774,
777
+ "Regression": 775,
778
+ "Regression (RMSE)": 776,
779
+ "Regulation Retrieval": 777,
780
+ "Regulatory Classification": 778,
781
+ "Regulatory Guidance": 779,
782
+ "Reinforcement Learning": 780,
783
+ "Reinforcement Learning Teaching": 781,
784
+ "Relation Classification": 782,
785
+ "Relation Extraction": 783,
786
+ "Relation Mapping": 784,
787
+ "Remote Sensing Image Classification": 785,
788
+ "Representation Learning": 786,
789
+ "Requirement Classification": 787,
790
+ "Reranking": 788,
791
+ "Reranking (query–product relevance)": 789,
792
+ "Response Generation": 790,
793
+ "Response Harmfulness Classification": 791,
794
+ "Resume Classification": 792,
795
+ "Retinal Vessel Segmentation": 793,
796
+ "Retrieval": 794,
797
+ "Reward Hack Detection": 795,
798
+ "Reward Modeling": 796,
799
+ "Risk Tolerance (Low/Medium/High)": 797,
800
+ "Robot Control": 798,
801
+ "Robot Manipulation": 799,
802
+ "Robotic Manipulation": 800,
803
+ "Robustness Tests": 801,
804
+ "Role-Aware Multi-Label Abuse Pattern Detection": 802,
805
+ "S2TT": 803,
806
+ "SENTS": 804,
807
+ "SICK-R": 805,
808
+ "SIQA": 806,
809
+ "SQuAD": 807,
810
+ "STEM": 808,
811
+ "STS": 809,
812
+ "STS Benchmark": 810,
813
+ "STS-ca": 811,
814
+ "STSBenchmark": 812,
815
+ "Safety & Compliance": 813,
816
+ "Sarcasm Detection": 814,
817
+ "Scene Change Detection": 815,
818
+ "Scene Classification": 816,
819
+ "Scene Flow Estimation": 817,
820
+ "Scene Segmentation": 818,
821
+ "Scene Text Recognition": 819,
822
+ "Scientific text generation": 820,
823
+ "Secret Detection": 821,
824
+ "Secret Detection (Long Context)": 822,
825
+ "Segmentation": 823,
826
+ "Segmentation Tasks": 824,
827
+ "Self-Supervised Learning": 825,
828
+ "Semantic Evidence Filtering": 826,
829
+ "Semantic Parsing": 827,
830
+ "Semantic Retrieval": 828,
831
+ "Semantic Search": 829,
832
+ "Semantic Segmentation": 830,
833
+ "Semantic Similarity": 831,
834
+ "Semantic Similarity (STS Validation)": 832,
835
+ "Semantic Textual Similarity": 833,
836
+ "Semantic Textual Similarity (Azerbaijani)": 834,
837
+ "Semantic entity labeling": 835,
838
+ "Semi-Supervised Image Classification": 836,
839
+ "Semi-Supervised Instance Segmentation": 837,
840
+ "Semi-Supervised Video Object Segmentation": 838,
841
+ "Sentence Classification": 839,
842
+ "Sentence Completion": 840,
843
+ "Sentence Ordering": 841,
844
+ "Sentence Relevance Classification": 842,
845
+ "Sentence Similarity": 843,
846
+ "Sentence completion": 844,
847
+ "Sentence-Embedding": 845,
848
+ "Sentic-GCN": 846,
849
+ "Sentic-GCN Bert": 847,
850
+ "Sentiment Analysis": 848,
851
+ "Sentiment Analysis (Regression)": 849,
852
+ "Sentiment Classification": 850,
853
+ "Sentiment classification": 851,
854
+ "Sequence Classification": 852,
855
+ "Sequence Labeling": 853,
856
+ "Sequence-to-sequence Language Modeling": 854,
857
+ "ShaderEval": 855,
858
+ "Short-term Object Interaction Anticipation": 856,
859
+ "Sign Language Recognition": 857,
860
+ "Silhouette": 858,
861
+ "Single Choice Question": 859,
862
+ "Single-object discovery": 860,
863
+ "Skill Level (Low/Medium/High)": 861,
864
+ "Skin Tumor Classification": 862,
865
+ "Slot Filling": 863,
866
+ "Solubility": 864,
867
+ "Solving Partial Differential Equations": 865,
868
+ "Space-time Video Super-resolution": 866,
869
+ "Spam / Ham Classification": 867,
870
+ "Spam Detection": 868,
871
+ "Spam Review Detection": 869,
872
+ "Span-Extraction": 870,
873
+ "Sparse Binary Classification": 871,
874
+ "Sparse Information Retrieval": 872,
875
+ "Sparse Learning": 873,
876
+ "Sparse Nano BEIR": 874,
877
+ "Spatial Reasoning": 875,
878
+ "Speaker Diarization": 876,
879
+ "Speaker Identification": 877,
880
+ "Speaker Recognition": 878,
881
+ "Speaker Verification": 879,
882
+ "Specialized Capabilities": 880,
883
+ "Speech Emotion Recognition": 881,
884
+ "Speech Enhancement": 882,
885
+ "Speech Recognition": 883,
886
+ "Speech Separation": 884,
887
+ "Speech Synthesis": 885,
888
+ "Speech Translation": 886,
889
+ "Speech Translation (ML→EN)": 887,
890
+ "Speech-to-Phoneme": 888,
891
+ "Speech-to-Speech Translation": 889,
892
+ "Speech-to-Text": 890,
893
+ "Speech-to-Text Translation": 891,
894
+ "Speed": 892,
895
+ "Spoken Command Recognition": 893,
896
+ "Spoken Language Understanding": 894,
897
+ "Stance Classification": 895,
898
+ "StarCraft Multi-Agent Challenge v2": 896,
899
+ "Stereo Depth Estimation": 897,
900
+ "Stereo Disparity Estimation": 898,
901
+ "Stereotypical Bias Analysis": 899,
902
+ "Stock Market Prediction": 900,
903
+ "Stock Trading": 901,
904
+ "Story Continuation": 902,
905
+ "Story Point Estimation": 903,
906
+ "Strategy QA (internal heuristic eval)": 904,
907
+ "Strong Gravitational Lens Discovery": 905,
908
+ "Style classification (holdout)": 906,
909
+ "Style classification (real-world baseline)": 907,
910
+ "Subjectivity Analysis": 908,
911
+ "Subjectivity Detection": 909,
912
+ "Suggestive Content Detection": 910,
913
+ "Suicidal Tendency Prediction in text": 911,
914
+ "Suicide Risk Detection": 912,
915
+ "Summarization": 913,
916
+ "Super Resolution": 914,
917
+ "Surgical Triplet Recognition": 915,
918
+ "Syriac Vocalization": 916,
919
+ "TAG": 917,
920
+ "TC": 918,
921
+ "TEca": 919,
922
+ "TOON conversion (schema-driven extraction)": 920,
923
+ "TabFQuAD": 921,
924
+ "Table Detection": 922,
925
+ "Table-to-Text Generation": 923,
926
+ "Tabular Classification": 924,
927
+ "Tabular Regression": 925,
928
+ "Target Prioritization": 926,
929
+ "TeCla": 927,
930
+ "Temporal Action Localization": 928,
931
+ "Temporal Relation Extraction": 929,
932
+ "Temporal Sentence Grounding": 930,
933
+ "Text Classification": 931,
934
+ "Text Classification (Sentiment Analysis)": 932,
935
+ "Text Classification (multi-label emotions)": 933,
936
+ "Text Classification Denial": 934,
937
+ "Text Classification Question": 935,
938
+ "Text Clustering": 936,
939
+ "Text Detection": 937,
940
+ "Text Generation": 938,
941
+ "Text Generation (Field Normalization)": 939,
942
+ "Text Generation (In-Domain)": 940,
943
+ "Text Generation (Out-of-Domain)": 941,
944
+ "Text Regression": 942,
945
+ "Text Retrieval": 943,
946
+ "Text Simplification": 944,
947
+ "Text Summarization": 945,
948
+ "Text To Speech": 946,
949
+ "Text Tokenization": 947,
950
+ "Text classification": 948,
951
+ "Text generation": 949,
952
+ "Text to 3D": 950,
953
+ "Text to Audio Retrieval": 951,
954
+ "Text to Molecular Generation": 952,
955
+ "Text to SQL": 953,
956
+ "Text to Speech": 954,
957
+ "Text-To-SQL": 955,
958
+ "Text-To-Speech Synthesis": 956,
959
+ "Text-based de novo Molecule Generation": 957,
960
+ "Text-classification": 958,
961
+ "Text-to-Image Generation": 959,
962
+ "Text-to-Music Generation": 960,
963
+ "Text-to-Speech": 961,
964
+ "Text-to-Video Generation": 962,
965
+ "Text2Text Generation": 963,
966
+ "The Semantic Segmentation Of Remote Sensing Imagery": 964,
967
+ "Theory of Mind": 965,
968
+ "Thinking": 966,
969
+ "Time Series Forecasting": 967,
970
+ "TinyQA Benchmark++": 968,
971
+ "Token Classification": 969,
972
+ "Token classification": 970,
973
+ "Tomato": 971,
974
+ "Tool Use": 972,
975
+ "Topic Classification": 973,
976
+ "Toxic-detector-cnn": 974,
977
+ "Toxic-detector-rnn": 975,
978
+ "Toxic-detector-roberta": 976,
979
+ "Toxicity (12 tasks)": 977,
980
+ "Toxicity Detection": 978,
981
+ "Track classification": 979,
982
+ "Trading": 980,
983
+ "Traffic Prediction": 981,
984
+ "Training-free 3D Part Segmentation": 982,
985
+ "Training-free 3D Point Cloud Classification": 983,
986
+ "Transit Route Planning": 984,
987
+ "Translation": 985,
988
+ "Translation (de-en)": 986,
989
+ "Translation En-to-ES": 987,
990
+ "Translation English-to-Swahili": 988,
991
+ "Translation Quality Estimation": 989,
992
+ "Translation acm-deu": 990,
993
+ "Translation acm-eng": 991,
994
+ "Translation acm-fra": 992,
995
+ "Translation acm-por": 993,
996
+ "Translation acm-spa": 994,
997
+ "Translation afr-deu": 995,
998
+ "Translation afr-eng": 996,
999
+ "Translation afr-fra": 997,
1000
+ "Translation afr-nld": 998,
1001
+ "Translation afr-por": 999,
1002
+ "Translation afr-spa": 1000,
1003
+ "Translation amh-deu": 1001,
1004
+ "Translation amh-eng": 1002,
1005
+ "Translation amh-fra": 1003,
1006
+ "Translation amh-por": 1004,
1007
+ "Translation amh-spa": 1005,
1008
+ "Translation apc-deu": 1006,
1009
+ "Translation apc-eng": 1007,
1010
+ "Translation apc-fra": 1008,
1011
+ "Translation apc-por": 1009,
1012
+ "Translation apc-spa": 1010,
1013
+ "Translation ara-cat": 1011,
1014
+ "Translation ara-dan": 1012,
1015
+ "Translation ara-deu": 1013,
1016
+ "Translation ara-eng": 1014,
1017
+ "Translation ara-fra": 1015,
1018
+ "Translation ara-glg": 1016,
1019
+ "Translation ara-ita": 1017,
1020
+ "Translation ara-nob": 1018,
1021
+ "Translation ara-por": 1019,
1022
+ "Translation ara-ron": 1020,
1023
+ "Translation ara-spa": 1021,
1024
+ "Translation ara-swe": 1022,
1025
+ "Translation arb-eng": 1023,
1026
+ "Translation arz-deu": 1024,
1027
+ "Translation arz-eng": 1025,
1028
+ "Translation arz-fra": 1026,
1029
+ "Translation arz-por": 1027,
1030
+ "Translation arz-spa": 1028,
1031
+ "Translation asm-eng": 1029,
1032
+ "Translation asm-fra": 1030,
1033
+ "Translation asm-por": 1031,
1034
+ "Translation ast-cat": 1032,
1035
+ "Translation ast-deu": 1033,
1036
+ "Translation ast-eng": 1034,
1037
+ "Translation ast-fra": 1035,
1038
+ "Translation ast-glg": 1036,
1039
+ "Translation ast-ita": 1037,
1040
+ "Translation ast-oci": 1038,
1041
+ "Translation ast-por": 1039,
1042
+ "Translation ast-ron": 1040,
1043
+ "Translation ast-spa": 1041,
1044
+ "Translation awa-deu": 1042,
1045
+ "Translation awa-eng": 1043,
1046
+ "Translation awa-fra": 1044,
1047
+ "Translation awa-por": 1045,
1048
+ "Translation awa-spa": 1046,
1049
+ "Translation aze_Latn-deu": 1047,
1050
+ "Translation aze_Latn-eng": 1048,
1051
+ "Translation aze_Latn-fra": 1049,
1052
+ "Translation aze_Latn-por": 1050,
1053
+ "Translation aze_Latn-spa": 1051,
1054
+ "Translation bak-eng": 1052,
1055
+ "Translation ban-eng": 1053,
1056
+ "Translation ban-fra": 1054,
1057
+ "Translation ban-por": 1055,
1058
+ "Translation bar-bar": 1056,
1059
+ "Translation bel-cat": 1057,
1060
+ "Translation bel-deu": 1058,
1061
+ "Translation bel-eng": 1059,
1062
+ "Translation bel-fra": 1060,
1063
+ "Translation bel-glg": 1061,
1064
+ "Translation bel-ita": 1062,
1065
+ "Translation bel-pol": 1063,
1066
+ "Translation bel-por": 1064,
1067
+ "Translation bel-ron": 1065,
1068
+ "Translation bel-rus": 1066,
1069
+ "Translation bel-spa": 1067,
1070
+ "Translation bel-ukr": 1068,
1071
+ "Translation bem-eng": 1069,
1072
+ "Translation bem-fra": 1070,
1073
+ "Translation bem-por": 1071,
1074
+ "Translation bem-spa": 1072,
1075
+ "Translation ben-deu": 1073,
1076
+ "Translation ben-eng": 1074,
1077
+ "Translation ben-fra": 1075,
1078
+ "Translation ben-por": 1076,
1079
+ "Translation ben-spa": 1077,
1080
+ "Translation bho-deu": 1078,
1081
+ "Translation bho-eng": 1079,
1082
+ "Translation bho-fra": 1080,
1083
+ "Translation bho-por": 1081,
1084
+ "Translation bho-spa": 1082,
1085
+ "Translation bos_Latn-eng": 1083,
1086
+ "Translation bre-eng": 1084,
1087
+ "Translation bre-fra": 1085,
1088
+ "Translation bul-deu": 1086,
1089
+ "Translation bul-eng": 1087,
1090
+ "Translation bul-fra": 1088,
1091
+ "Translation bul-ita": 1089,
1092
+ "Translation bul-por": 1090,
1093
+ "Translation bul-ron": 1091,
1094
+ "Translation bul-rus": 1092,
1095
+ "Translation bul-spa": 1093,
1096
+ "Translation bul-ukr": 1094,
1097
+ "Translation cat-ara": 1095,
1098
+ "Translation cat-ast": 1096,
1099
+ "Translation cat-deu": 1097,
1100
+ "Translation cat-eng": 1098,
1101
+ "Translation cat-fra": 1099,
1102
+ "Translation cat-glg": 1100,
1103
+ "Translation cat-heb": 1101,
1104
+ "Translation cat-ita": 1102,
1105
+ "Translation cat-lav": 1103,
1106
+ "Translation cat-lit": 1104,
1107
+ "Translation cat-oci": 1105,
1108
+ "Translation cat-por": 1106,
1109
+ "Translation cat-ron": 1107,
1110
+ "Translation cat-spa": 1108,
1111
+ "Translation cat-tur": 1109,
1112
+ "Translation ceb-deu": 1110,
1113
+ "Translation ceb-eng": 1111,
1114
+ "Translation ceb-fra": 1112,
1115
+ "Translation ceb-por": 1113,
1116
+ "Translation ceb-spa": 1114,
1117
+ "Translation ces-deu": 1115,
1118
+ "Translation ces-eng": 1116,
1119
+ "Translation ces-fra": 1117,
1120
+ "Translation ces-por": 1118,
1121
+ "Translation ces-rus": 1119,
1122
+ "Translation ces-spa": 1120,
1123
+ "Translation ces-ukr": 1121,
1124
+ "Translation ckb-deu": 1122,
1125
+ "Translation ckb-eng": 1123,
1126
+ "Translation ckb-fra": 1124,
1127
+ "Translation ckb-por": 1125,
1128
+ "Translation ckb-spa": 1126,
1129
+ "Translation cmn_Hans-eng": 1127,
1130
+ "Translation cmn_Hans-fra": 1128,
1131
+ "Translation cmn_Hans-por": 1129,
1132
+ "Translation cmn_Hans-spa": 1130,
1133
+ "Translation cmn_Hant-eng": 1131,
1134
+ "Translation cmn_Hant-fra": 1132,
1135
+ "Translation cmn_Hant-por": 1133,
1136
+ "Translation cmn_Hant-spa": 1134,
1137
+ "Translation crh-deu": 1135,
1138
+ "Translation crh-eng": 1136,
1139
+ "Translation crh-fra": 1137,
1140
+ "Translation crh-por": 1138,
1141
+ "Translation crh-spa": 1139,
1142
+ "Translation cym-deu": 1140,
1143
+ "Translation cym-eng": 1141,
1144
+ "Translation cym-fra": 1142,
1145
+ "Translation cym-por": 1143,
1146
+ "Translation cym-spa": 1144,
1147
+ "Translation dan-ara": 1145,
1148
+ "Translation dan-cat": 1146,
1149
+ "Translation dan-ces": 1147,
1150
+ "Translation dan-deu": 1148,
1151
+ "Translation dan-eng": 1149,
1152
+ "Translation dan-fra": 1150,
1153
+ "Translation dan-glg": 1151,
1154
+ "Translation dan-heb": 1152,
1155
+ "Translation dan-isl": 1153,
1156
+ "Translation dan-ita": 1154,
1157
+ "Translation dan-nob": 1155,
1158
+ "Translation dan-pol": 1156,
1159
+ "Translation dan-por": 1157,
1160
+ "Translation dan-ron": 1158,
1161
+ "Translation dan-rus": 1159,
1162
+ "Translation dan-spa": 1160,
1163
+ "Translation dan-swe": 1161,
1164
+ "Translation dan-tur": 1162,
1165
+ "Translation dan-ukr": 1163,
1166
+ "Translation deu-afr": 1164,
1167
+ "Translation deu-ara": 1165,
1168
+ "Translation deu-ast": 1166,
1169
+ "Translation deu-bel": 1167,
1170
+ "Translation deu-ben": 1168,
1171
+ "Translation deu-bul": 1169,
1172
+ "Translation deu-cat": 1170,
1173
+ "Translation deu-ces": 1171,
1174
+ "Translation deu-cym": 1172,
1175
+ "Translation deu-dan": 1173,
1176
+ "Translation deu-deu": 1174,
1177
+ "Translation deu-ell": 1175,
1178
+ "Translation deu-eng": 1176,
1179
+ "Translation deu-est": 1177,
1180
+ "Translation deu-fao": 1178,
1181
+ "Translation deu-fas": 1179,
1182
+ "Translation deu-fin": 1180,
1183
+ "Translation deu-fra": 1181,
1184
+ "Translation deu-fur": 1182,
1185
+ "Translation deu-gle": 1183,
1186
+ "Translation deu-glg": 1184,
1187
+ "Translation deu-guj": 1185,
1188
+ "Translation deu-hat": 1186,
1189
+ "Translation deu-hau": 1187,
1190
+ "Translation deu-heb": 1188,
1191
+ "Translation deu-hin": 1189,
1192
+ "Translation deu-hne": 1190,
1193
+ "Translation deu-hrv": 1191,
1194
+ "Translation deu-hun": 1192,
1195
+ "Translation deu-isl": 1193,
1196
+ "Translation deu-ita": 1194,
1197
+ "Translation deu-lad": 1195,
1198
+ "Translation deu-lav": 1196,
1199
+ "Translation deu-lij": 1197,
1200
+ "Translation deu-lit": 1198,
1201
+ "Translation deu-ltz": 1199,
1202
+ "Translation deu-mag": 1200,
1203
+ "Translation deu-mkd": 1201,
1204
+ "Translation deu-mlt": 1202,
1205
+ "Translation deu-nds": 1203,
1206
+ "Translation deu-nld": 1204,
1207
+ "Translation deu-nno": 1205,
1208
+ "Translation deu-nob": 1206,
1209
+ "Translation deu-nor": 1207,
1210
+ "Translation deu-oci": 1208,
1211
+ "Translation deu-pan": 1209,
1212
+ "Translation deu-pap": 1210,
1213
+ "Translation deu-pes": 1211,
1214
+ "Translation deu-pol": 1212,
1215
+ "Translation deu-por": 1213,
1216
+ "Translation deu-prs": 1214,
1217
+ "Translation deu-ron": 1215,
1218
+ "Translation deu-rus": 1216,
1219
+ "Translation deu-slk": 1217,
1220
+ "Translation deu-slv": 1218,
1221
+ "Translation deu-spa": 1219,
1222
+ "Translation deu-sqi": 1220,
1223
+ "Translation deu-srd": 1221,
1224
+ "Translation deu-srp_Cyrl": 1222,
1225
+ "Translation deu-swa": 1223,
1226
+ "Translation deu-swe": 1224,
1227
+ "Translation deu-tgk": 1225,
1228
+ "Translation deu-tpi": 1226,
1229
+ "Translation deu-tsn": 1227,
1230
+ "Translation deu-ukr": 1228,
1231
+ "Translation deu-urd": 1229,
1232
+ "Translation deu-vie": 1230,
1233
+ "Translation drt-deu": 1231,
1234
+ "Translation drt-eng": 1232,
1235
+ "Translation drt-fry": 1233,
1236
+ "Translation drt-nld": 1234,
1237
+ "Translation dsb-deu": 1235,
1238
+ "Translation ell-deu": 1236,
1239
+ "Translation ell-eng": 1237,
1240
+ "Translation ell-fra": 1238,
1241
+ "Translation ell-por": 1239,
1242
+ "Translation ell-spa": 1240,
1243
+ "Translation en-ru": 1241,
1244
+ "Translation eng-afr": 1242,
1245
+ "Translation eng-ara": 1243,
1246
+ "Translation eng-arz": 1244,
1247
+ "Translation eng-ast": 1245,
1248
+ "Translation eng-bel": 1246,
1249
+ "Translation eng-ben": 1247,
1250
+ "Translation eng-bho": 1248,
1251
+ "Translation eng-bos_Latn": 1249,
1252
+ "Translation eng-bul": 1250,
1253
+ "Translation eng-cat": 1251,
1254
+ "Translation eng-ces": 1252,
1255
+ "Translation eng-cym": 1253,
1256
+ "Translation eng-dan": 1254,
1257
+ "Translation eng-deu": 1255,
1258
+ "Translation eng-ell": 1256,
1259
+ "Translation eng-eng": 1257,
1260
+ "Translation eng-est": 1258,
1261
+ "Translation eng-fao": 1259,
1262
+ "Translation eng-fas": 1260,
1263
+ "Translation eng-fin": 1261,
1264
+ "Translation eng-fra": 1262,
1265
+ "Translation eng-fry": 1263,
1266
+ "Translation eng-fur": 1264,
1267
+ "Translation eng-gla": 1265,
1268
+ "Translation eng-gle": 1266,
1269
+ "Translation eng-glg": 1267,
1270
+ "Translation eng-guj": 1268,
1271
+ "Translation eng-hat": 1269,
1272
+ "Translation eng-hau": 1270,
1273
+ "Translation eng-hbs": 1271,
1274
+ "Translation eng-heb": 1272,
1275
+ "Translation eng-hin": 1273,
1276
+ "Translation eng-hne": 1274,
1277
+ "Translation eng-hrv": 1275,
1278
+ "Translation eng-hun": 1276,
1279
+ "Translation eng-ind": 1277,
1280
+ "Translation eng-isl": 1278,
1281
+ "Translation eng-ita": 1279,
1282
+ "Translation eng-jpg": 1280,
1283
+ "Translation eng-jpn": 1281,
1284
+ "Translation eng-kea": 1282,
1285
+ "Translation eng-kin": 1283,
1286
+ "Translation eng-kor": 1284,
1287
+ "Translation eng-lad": 1285,
1288
+ "Translation eng-lad_Latn": 1286,
1289
+ "Translation eng-lat": 1287,
1290
+ "Translation eng-lav": 1288,
1291
+ "Translation eng-lij": 1289,
1292
+ "Translation eng-lin": 1290,
1293
+ "Translation eng-lit": 1291,
1294
+ "Translation eng-ltz": 1292,
1295
+ "Translation eng-lug": 1293,
1296
+ "Translation eng-mag": 1294,
1297
+ "Translation eng-mai": 1295,
1298
+ "Translation eng-mar": 1296,
1299
+ "Translation eng-mkd": 1297,
1300
+ "Translation eng-mld": 1298,
1301
+ "Translation eng-mlt": 1299,
1302
+ "Translation eng-nds": 1300,
1303
+ "Translation eng-nep": 1301,
1304
+ "Translation eng-nld": 1302,
1305
+ "Translation eng-nno": 1303,
1306
+ "Translation eng-nob": 1304,
1307
+ "Translation eng-nor": 1305,
1308
+ "Translation eng-nso": 1306,
1309
+ "Translation eng-nya": 1307,
1310
+ "Translation eng-oci": 1308,
1311
+ "Translation eng-pan": 1309,
1312
+ "Translation eng-pap": 1310,
1313
+ "Translation eng-pes": 1311,
1314
+ "Translation eng-pol": 1312,
1315
+ "Translation eng-por": 1313,
1316
+ "Translation eng-prs": 1314,
1317
+ "Translation eng-pus": 1315,
1318
+ "Translation eng-ron": 1316,
1319
+ "Translation eng-rus": 1317,
1320
+ "Translation eng-sco": 1318,
1321
+ "Translation eng-sin": 1319,
1322
+ "Translation eng-slk": 1320,
1323
+ "Translation eng-slv": 1321,
1324
+ "Translation eng-sna": 1322,
1325
+ "Translation eng-som": 1323,
1326
+ "Translation eng-sot": 1324,
1327
+ "Translation eng-spa": 1325,
1328
+ "Translation eng-sqi": 1326,
1329
+ "Translation eng-srd": 1327,
1330
+ "Translation eng-srn": 1328,
1331
+ "Translation eng-srp_Cyrl": 1329,
1332
+ "Translation eng-srp_Latn": 1330,
1333
+ "Translation eng-swa": 1331,
1334
+ "Translation eng-swe": 1332,
1335
+ "Translation eng-tgk": 1333,
1336
+ "Translation eng-tgk_Cyrl": 1334,
1337
+ "Translation eng-tha": 1335,
1338
+ "Translation eng-tpi": 1336,
1339
+ "Translation eng-tsn": 1337,
1340
+ "Translation eng-tso": 1338,
1341
+ "Translation eng-tur": 1339,
1342
+ "Translation eng-ukr": 1340,
1343
+ "Translation eng-urd": 1341,
1344
+ "Translation eng-vie": 1342,
1345
+ "Translation eng-xho": 1343,
1346
+ "Translation eng-zho": 1344,
1347
+ "Translation eng-zul": 1345,
1348
+ "Translation enm-deu": 1346,
1349
+ "Translation enm-eng": 1347,
1350
+ "Translation enm-fry": 1348,
1351
+ "Translation enm-ltz": 1349,
1352
+ "Translation enm-nld": 1350,
1353
+ "Translation epo-deu": 1351,
1354
+ "Translation epo-eng": 1352,
1355
+ "Translation epo-fra": 1353,
1356
+ "Translation epo-por": 1354,
1357
+ "Translation epo-spa": 1355,
1358
+ "Translation est-deu": 1356,
1359
+ "Translation est-eng": 1357,
1360
+ "Translation est-fra": 1358,
1361
+ "Translation est-por": 1359,
1362
+ "Translation est-spa": 1360,
1363
+ "Translation eus-deu": 1361,
1364
+ "Translation eus-eng": 1362,
1365
+ "Translation eus-fra": 1363,
1366
+ "Translation eus-por": 1364,
1367
+ "Translation eus-spa": 1365,
1368
+ "Translation fao-deu": 1366,
1369
+ "Translation fao-eng": 1367,
1370
+ "Translation fao-fra": 1368,
1371
+ "Translation fao-por": 1369,
1372
+ "Translation fao-spa": 1370,
1373
+ "Translation fas-dan": 1371,
1374
+ "Translation fas-deu": 1372,
1375
+ "Translation fas-eng": 1373,
1376
+ "Translation fas-fra": 1374,
1377
+ "Translation fas-ita": 1375,
1378
+ "Translation fas-por": 1376,
1379
+ "Translation fas-ron": 1377,
1380
+ "Translation fas-spa": 1378,
1381
+ "Translation fij-eng": 1379,
1382
+ "Translation fil-deu": 1380,
1383
+ "Translation fil-eng": 1381,
1384
+ "Translation fil-fra": 1382,
1385
+ "Translation fil-por": 1383,
1386
+ "Translation fil-spa": 1384,
1387
+ "Translation fin-bul": 1385,
1388
+ "Translation fin-deu": 1386,
1389
+ "Translation fin-eng": 1387,
1390
+ "Translation fin-fra": 1388,
1391
+ "Translation fin-hrv": 1389,
1392
+ "Translation fin-por": 1390,
1393
+ "Translation fin-rus": 1391,
1394
+ "Translation fin-slv": 1392,
1395
+ "Translation fin-spa": 1393,
1396
+ "Translation fin-srp_Cyrl": 1394,
1397
+ "Translation fin-ukr": 1395,
1398
+ "Translation fra-afr": 1396,
1399
+ "Translation fra-ara": 1397,
1400
+ "Translation fra-ast": 1398,
1401
+ "Translation fra-bel": 1399,
1402
+ "Translation fra-ben": 1400,
1403
+ "Translation fra-bul": 1401,
1404
+ "Translation fra-cat": 1402,
1405
+ "Translation fra-ces": 1403,
1406
+ "Translation fra-cym": 1404,
1407
+ "Translation fra-dan": 1405,
1408
+ "Translation fra-deu": 1406,
1409
+ "Translation fra-ell": 1407,
1410
+ "Translation fra-eng": 1408,
1411
+ "Translation fra-est": 1409,
1412
+ "Translation fra-fao": 1410,
1413
+ "Translation fra-fas": 1411,
1414
+ "Translation fra-fin": 1412,
1415
+ "Translation fra-fra": 1413,
1416
+ "Translation fra-fur": 1414,
1417
+ "Translation fra-gle": 1415,
1418
+ "Translation fra-glg": 1416,
1419
+ "Translation fra-guj": 1417,
1420
+ "Translation fra-hat": 1418,
1421
+ "Translation fra-hau": 1419,
1422
+ "Translation fra-hbs": 1420,
1423
+ "Translation fra-heb": 1421,
1424
+ "Translation fra-hin": 1422,
1425
+ "Translation fra-hne": 1423,
1426
+ "Translation fra-hrv": 1424,
1427
+ "Translation fra-hun": 1425,
1428
+ "Translation fra-isl": 1426,
1429
+ "Translation fra-ita": 1427,
1430
+ "Translation fra-kea": 1428,
1431
+ "Translation fra-lav": 1429,
1432
+ "Translation fra-lij": 1430,
1433
+ "Translation fra-lin": 1431,
1434
+ "Translation fra-lit": 1432,
1435
+ "Translation fra-ltz": 1433,
1436
+ "Translation fra-mag": 1434,
1437
+ "Translation fra-mkd": 1435,
1438
+ "Translation fra-mlt": 1436,
1439
+ "Translation fra-nep": 1437,
1440
+ "Translation fra-nld": 1438,
1441
+ "Translation fra-nno": 1439,
1442
+ "Translation fra-nob": 1440,
1443
+ "Translation fra-nor": 1441,
1444
+ "Translation fra-oci": 1442,
1445
+ "Translation fra-pan": 1443,
1446
+ "Translation fra-pap": 1444,
1447
+ "Translation fra-pes": 1445,
1448
+ "Translation fra-pol": 1446,
1449
+ "Translation fra-por": 1447,
1450
+ "Translation fra-prs": 1448,
1451
+ "Translation fra-pus": 1449,
1452
+ "Translation fra-ron": 1450,
1453
+ "Translation fra-rus": 1451,
1454
+ "Translation fra-slk": 1452,
1455
+ "Translation fra-slv": 1453,
1456
+ "Translation fra-spa": 1454,
1457
+ "Translation fra-sqi": 1455,
1458
+ "Translation fra-srd": 1456,
1459
+ "Translation fra-srp_Cyrl": 1457,
1460
+ "Translation fra-swa": 1458,
1461
+ "Translation fra-swe": 1459,
1462
+ "Translation fra-tgk": 1460,
1463
+ "Translation fra-tpi": 1461,
1464
+ "Translation fra-tsn": 1462,
1465
+ "Translation fra-tur": 1463,
1466
+ "Translation fra-ukr": 1464,
1467
+ "Translation fra-urd": 1465,
1468
+ "Translation fra-vie": 1466,
1469
+ "Translation fry-deu": 1467,
1470
+ "Translation fry-eng": 1468,
1471
+ "Translation fry-ltz": 1469,
1472
+ "Translation fry-nld": 1470,
1473
+ "Translation fur-deu": 1471,
1474
+ "Translation fur-eng": 1472,
1475
+ "Translation fur-fra": 1473,
1476
+ "Translation fur-por": 1474,
1477
+ "Translation fur-spa": 1475,
1478
+ "Translation gla-deu": 1476,
1479
+ "Translation gla-eng": 1477,
1480
+ "Translation gla-fra": 1478,
1481
+ "Translation gla-por": 1479,
1482
+ "Translation gla-spa": 1480,
1483
+ "Translation gle-deu": 1481,
1484
+ "Translation gle-eng": 1482,
1485
+ "Translation gle-fra": 1483,
1486
+ "Translation gle-por": 1484,
1487
+ "Translation gle-spa": 1485,
1488
+ "Translation glg-ara": 1486,
1489
+ "Translation glg-ast": 1487,
1490
+ "Translation glg-cat": 1488,
1491
+ "Translation glg-deu": 1489,
1492
+ "Translation glg-eng": 1490,
1493
+ "Translation glg-fra": 1491,
1494
+ "Translation glg-heb": 1492,
1495
+ "Translation glg-ita": 1493,
1496
+ "Translation glg-lav": 1494,
1497
+ "Translation glg-lit": 1495,
1498
+ "Translation glg-oci": 1496,
1499
+ "Translation glg-por": 1497,
1500
+ "Translation glg-ron": 1498,
1501
+ "Translation glg-spa": 1499,
1502
+ "Translation glg-tur": 1500,
1503
+ "Translation gos-afr": 1501,
1504
+ "Translation gos-deu": 1502,
1505
+ "Translation gos-eng": 1503,
1506
+ "Translation gos-fry": 1504,
1507
+ "Translation gos-nld": 1505,
1508
+ "Translation grn-eng": 1506,
1509
+ "Translation grn-fra": 1507,
1510
+ "Translation grn-por": 1508,
1511
+ "Translation gsw-deu": 1509,
1512
+ "Translation gsw-eng": 1510,
1513
+ "Translation gsw-nld": 1511,
1514
+ "Translation guj-deu": 1512,
1515
+ "Translation guj-eng": 1513,
1516
+ "Translation guj-fra": 1514,
1517
+ "Translation guj-por": 1515,
1518
+ "Translation guj-spa": 1516,
1519
+ "Translation hat-deu": 1517,
1520
+ "Translation hat-eng": 1518,
1521
+ "Translation hat-fra": 1519,
1522
+ "Translation hat-por": 1520,
1523
+ "Translation hat-spa": 1521,
1524
+ "Translation hau-eng": 1522,
1525
+ "Translation hau-fra": 1523,
1526
+ "Translation hau-por": 1524,
1527
+ "Translation hau-spa": 1525,
1528
+ "Translation hbs-deu": 1526,
1529
+ "Translation hbs-eng": 1527,
1530
+ "Translation hbs-fra": 1528,
1531
+ "Translation hbs-ita": 1529,
1532
+ "Translation hbs-rus": 1530,
1533
+ "Translation hbs-spa": 1531,
1534
+ "Translation hbs-ukr": 1532,
1535
+ "Translation heb-cat": 1533,
1536
+ "Translation heb-dan": 1534,
1537
+ "Translation heb-deu": 1535,
1538
+ "Translation heb-eng": 1536,
1539
+ "Translation heb-fra": 1537,
1540
+ "Translation heb-glg": 1538,
1541
+ "Translation heb-isl": 1539,
1542
+ "Translation heb-ita": 1540,
1543
+ "Translation heb-nob": 1541,
1544
+ "Translation heb-por": 1542,
1545
+ "Translation heb-ron": 1543,
1546
+ "Translation heb-spa": 1544,
1547
+ "Translation heb-swe": 1545,
1548
+ "Translation hin-deu": 1546,
1549
+ "Translation hin-eng": 1547,
1550
+ "Translation hin-fra": 1548,
1551
+ "Translation hin-por": 1549,
1552
+ "Translation hin-spa": 1550,
1553
+ "Translation hne-deu": 1551,
1554
+ "Translation hne-eng": 1552,
1555
+ "Translation hne-fra": 1553,
1556
+ "Translation hne-por": 1554,
1557
+ "Translation hne-spa": 1555,
1558
+ "Translation hrv-deu": 1556,
1559
+ "Translation hrv-eng": 1557,
1560
+ "Translation hrv-fra": 1558,
1561
+ "Translation hrv-ita": 1559,
1562
+ "Translation hrv-por": 1560,
1563
+ "Translation hrv-ron": 1561,
1564
+ "Translation hrv-rus": 1562,
1565
+ "Translation hrv-spa": 1563,
1566
+ "Translation hrv-ukr": 1564,
1567
+ "Translation hrx-deu": 1565,
1568
+ "Translation hrx-eng": 1566,
1569
+ "Translation hsb-deu": 1567,
1570
+ "Translation hun-deu": 1568,
1571
+ "Translation hun-eng": 1569,
1572
+ "Translation hun-fra": 1570,
1573
+ "Translation hun-por": 1571,
1574
+ "Translation hun-spa": 1572,
1575
+ "Translation hun-ukr": 1573,
1576
+ "Translation hye-deu": 1574,
1577
+ "Translation hye-eng": 1575,
1578
+ "Translation hye-fra": 1576,
1579
+ "Translation hye-por": 1577,
1580
+ "Translation hye-spa": 1578,
1581
+ "Translation ibo-eng": 1579,
1582
+ "Translation ibo-fra": 1580,
1583
+ "Translation ibo-por": 1581,
1584
+ "Translation ibo-spa": 1582,
1585
+ "Translation ido_Latn-eng": 1583,
1586
+ "Translation ilo-deu": 1584,
1587
+ "Translation ilo-eng": 1585,
1588
+ "Translation ilo-fra": 1586,
1589
+ "Translation ilo-por": 1587,
1590
+ "Translation ilo-spa": 1588,
1591
+ "Translation ind-deu": 1589,
1592
+ "Translation ind-eng": 1590,
1593
+ "Translation ind-fra": 1591,
1594
+ "Translation ind-por": 1592,
1595
+ "Translation ind-spa": 1593,
1596
+ "Translation isl-cat": 1594,
1597
+ "Translation isl-ces": 1595,
1598
+ "Translation isl-dan": 1596,
1599
+ "Translation isl-deu": 1597,
1600
+ "Translation isl-eng": 1598,
1601
+ "Translation isl-fra": 1599,
1602
+ "Translation isl-glg": 1600,
1603
+ "Translation isl-heb": 1601,
1604
+ "Translation isl-ita": 1602,
1605
+ "Translation isl-nob": 1603,
1606
+ "Translation isl-pol": 1604,
1607
+ "Translation isl-por": 1605,
1608
+ "Translation isl-ron": 1606,
1609
+ "Translation isl-spa": 1607,
1610
+ "Translation isl-swe": 1608,
1611
+ "Translation ita-ara": 1609,
1612
+ "Translation ita-ast": 1610,
1613
+ "Translation ita-bel": 1611,
1614
+ "Translation ita-cat": 1612,
1615
+ "Translation ita-deu": 1613,
1616
+ "Translation ita-eng": 1614,
1617
+ "Translation ita-fra": 1615,
1618
+ "Translation ita-glg": 1616,
1619
+ "Translation ita-heb": 1617,
1620
+ "Translation ita-lav": 1618,
1621
+ "Translation ita-lit": 1619,
1622
+ "Translation ita-oci": 1620,
1623
+ "Translation ita-por": 1621,
1624
+ "Translation ita-ron": 1622,
1625
+ "Translation ita-rus": 1623,
1626
+ "Translation ita-spa": 1624,
1627
+ "Translation ita-tur": 1625,
1628
+ "Translation ita-ukr": 1626,
1629
+ "Translation jap-eng": 1627,
1630
+ "Translation jav-deu": 1628,
1631
+ "Translation jav-eng": 1629,
1632
+ "Translation jav-fra": 1630,
1633
+ "Translation jav-por": 1631,
1634
+ "Translation jav-spa": 1632,
1635
+ "Translation jpn-eng": 1633,
1636
+ "Translation jpn-fra": 1634,
1637
+ "Translation jpn-por": 1635,
1638
+ "Translation jpn-spa": 1636,
1639
+ "Translation kab-eng": 1637,
1640
+ "Translation kab-spa": 1638,
1641
+ "Translation kan-eng": 1639,
1642
+ "Translation kat-eng": 1640,
1643
+ "Translation kat-fra": 1641,
1644
+ "Translation kat-por": 1642,
1645
+ "Translation kat-spa": 1643,
1646
+ "Translation kaz-deu": 1644,
1647
+ "Translation kaz-eng": 1645,
1648
+ "Translation kaz-fra": 1646,
1649
+ "Translation kaz-por": 1647,
1650
+ "Translation kaz-spa": 1648,
1651
+ "Translation kaz_Cyrl-eng": 1649,
1652
+ "Translation kea-deu": 1650,
1653
+ "Translation kea-eng": 1651,
1654
+ "Translation kea-fra": 1652,
1655
+ "Translation kea-por": 1653,
1656
+ "Translation kea-spa": 1654,
1657
+ "Translation kik-eng": 1655,
1658
+ "Translation kik-fra": 1656,
1659
+ "Translation kin-eng": 1657,
1660
+ "Translation kin-fra": 1658,
1661
+ "Translation kin-por": 1659,
1662
+ "Translation kin-spa": 1660,
1663
+ "Translation kmr-eng": 1661,
1664
+ "Translation kmr-fra": 1662,
1665
+ "Translation kmr-por": 1663,
1666
+ "Translation kmr-spa": 1664,
1667
+ "Translation kon-eng": 1665,
1668
+ "Translation kon-fra": 1666,
1669
+ "Translation kon-por": 1667,
1670
+ "Translation kor-eng": 1668,
1671
+ "Translation kur_Latn-deu": 1669,
1672
+ "Translation kur_Latn-eng": 1670,
1673
+ "Translation lad-eng": 1671,
1674
+ "Translation lad-spa": 1672,
1675
+ "Translation lad_Latn-eng": 1673,
1676
+ "Translation lad_Latn-spa": 1674,
1677
+ "Translation lat-deu": 1675,
1678
+ "Translation lat-eng": 1676,
1679
+ "Translation lat-spa": 1677,
1680
+ "Translation lav-deu": 1678,
1681
+ "Translation lav-eng": 1679,
1682
+ "Translation lav-fra": 1680,
1683
+ "Translation lav-por": 1681,
1684
+ "Translation lav-rus": 1682,
1685
+ "Translation lav-spa": 1683,
1686
+ "Translation lfn_Latn-deu": 1684,
1687
+ "Translation lfn_Latn-eng": 1685,
1688
+ "Translation lfn_Latn-fra": 1686,
1689
+ "Translation lfn_Latn-por": 1687,
1690
+ "Translation lij-deu": 1688,
1691
+ "Translation lij-eng": 1689,
1692
+ "Translation lij-fra": 1690,
1693
+ "Translation lij-por": 1691,
1694
+ "Translation lij-spa": 1692,
1695
+ "Translation lim-deu": 1693,
1696
+ "Translation lim-eng": 1694,
1697
+ "Translation lim-fra": 1695,
1698
+ "Translation lim-nld": 1696,
1699
+ "Translation lim-por": 1697,
1700
+ "Translation lim-spa": 1698,
1701
+ "Translation lin-eng": 1699,
1702
+ "Translation lin-fra": 1700,
1703
+ "Translation lin-por": 1701,
1704
+ "Translation lin-spa": 1702,
1705
+ "Translation lit-deu": 1703,
1706
+ "Translation lit-eng": 1704,
1707
+ "Translation lit-fra": 1705,
1708
+ "Translation lit-por": 1706,
1709
+ "Translation lit-rus": 1707,
1710
+ "Translation lit-spa": 1708,
1711
+ "Translation lmo-deu": 1709,
1712
+ "Translation lmo-eng": 1710,
1713
+ "Translation lmo-fra": 1711,
1714
+ "Translation lmo-por": 1712,
1715
+ "Translation lmo-spa": 1713,
1716
+ "Translation ltz-deu": 1714,
1717
+ "Translation ltz-eng": 1715,
1718
+ "Translation ltz-fra": 1716,
1719
+ "Translation ltz-fry": 1717,
1720
+ "Translation ltz-nld": 1718,
1721
+ "Translation ltz-por": 1719,
1722
+ "Translation ltz-spa": 1720,
1723
+ "Translation lug-eng": 1721,
1724
+ "Translation lug-fra": 1722,
1725
+ "Translation lug-por": 1723,
1726
+ "Translation lug-spa": 1724,
1727
+ "Translation mag-deu": 1725,
1728
+ "Translation mag-eng": 1726,
1729
+ "Translation mag-fra": 1727,
1730
+ "Translation mag-por": 1728,
1731
+ "Translation mag-spa": 1729,
1732
+ "Translation mai-deu": 1730,
1733
+ "Translation mai-eng": 1731,
1734
+ "Translation mai-fra": 1732,
1735
+ "Translation mai-por": 1733,
1736
+ "Translation mai-spa": 1734,
1737
+ "Translation mal-eng": 1735,
1738
+ "Translation mal-fra": 1736,
1739
+ "Translation mar-deu": 1737,
1740
+ "Translation mar-eng": 1738,
1741
+ "Translation mar-fra": 1739,
1742
+ "Translation mar-por": 1740,
1743
+ "Translation mar-spa": 1741,
1744
+ "Translation mkd-deu": 1742,
1745
+ "Translation mkd-eng": 1743,
1746
+ "Translation mkd-fra": 1744,
1747
+ "Translation mkd-ita": 1745,
1748
+ "Translation mkd-por": 1746,
1749
+ "Translation mkd-ron": 1747,
1750
+ "Translation mkd-rus": 1748,
1751
+ "Translation mkd-spa": 1749,
1752
+ "Translation mkd-ukr": 1750,
1753
+ "Translation mlg-eng": 1751,
1754
+ "Translation mlg-fra": 1752,
1755
+ "Translation mlg-por": 1753,
1756
+ "Translation mlg-spa": 1754,
1757
+ "Translation mlt-deu": 1755,
1758
+ "Translation mlt-eng": 1756,
1759
+ "Translation mlt-fra": 1757,
1760
+ "Translation mlt-por": 1758,
1761
+ "Translation mlt-spa": 1759,
1762
+ "Translation mri-eng": 1760,
1763
+ "Translation mri-fra": 1761,
1764
+ "Translation mri-spa": 1762,
1765
+ "Translation msa-deu": 1763,
1766
+ "Translation msa-eng": 1764,
1767
+ "Translation msa-fra": 1765,
1768
+ "Translation msa-por": 1766,
1769
+ "Translation multi-eng": 1767,
1770
+ "Translation multi-fra": 1768,
1771
+ "Translation multi-multi": 1769,
1772
+ "Translation nde-eng": 1770,
1773
+ "Translation nde-fra": 1771,
1774
+ "Translation nde-por": 1772,
1775
+ "Translation nde-spa": 1773,
1776
+ "Translation nds-deu": 1774,
1777
+ "Translation nds-eng": 1775,
1778
+ "Translation nds-fra": 1776,
1779
+ "Translation nds-nld": 1777,
1780
+ "Translation nds-por": 1778,
1781
+ "Translation nds-spa": 1779,
1782
+ "Translation nep-deu": 1780,
1783
+ "Translation nep-eng": 1781,
1784
+ "Translation nep-fra": 1782,
1785
+ "Translation nep-por": 1783,
1786
+ "Translation nep-spa": 1784,
1787
+ "Translation nld-afr": 1785,
1788
+ "Translation nld-deu": 1786,
1789
+ "Translation nld-eng": 1787,
1790
+ "Translation nld-fra": 1788,
1791
+ "Translation nld-fry": 1789,
1792
+ "Translation nld-nds": 1790,
1793
+ "Translation nld-nld": 1791,
1794
+ "Translation nld-por": 1792,
1795
+ "Translation nld-sco": 1793,
1796
+ "Translation nld-spa": 1794,
1797
+ "Translation nno-deu": 1795,
1798
+ "Translation nno-eng": 1796,
1799
+ "Translation nno-fra": 1797,
1800
+ "Translation nno-nob": 1798,
1801
+ "Translation nno-por": 1799,
1802
+ "Translation nno-spa": 1800,
1803
+ "Translation nob-ara": 1801,
1804
+ "Translation nob-cat": 1802,
1805
+ "Translation nob-ces": 1803,
1806
+ "Translation nob-dan": 1804,
1807
+ "Translation nob-deu": 1805,
1808
+ "Translation nob-eng": 1806,
1809
+ "Translation nob-fra": 1807,
1810
+ "Translation nob-glg": 1808,
1811
+ "Translation nob-heb": 1809,
1812
+ "Translation nob-isl": 1810,
1813
+ "Translation nob-ita": 1811,
1814
+ "Translation nob-nno": 1812,
1815
+ "Translation nob-pol": 1813,
1816
+ "Translation nob-por": 1814,
1817
+ "Translation nob-ron": 1815,
1818
+ "Translation nob-rus": 1816,
1819
+ "Translation nob-spa": 1817,
1820
+ "Translation nob-swe": 1818,
1821
+ "Translation nob-tur": 1819,
1822
+ "Translation nob-ukr": 1820,
1823
+ "Translation nor-deu": 1821,
1824
+ "Translation nor-eng": 1822,
1825
+ "Translation nor-fra": 1823,
1826
+ "Translation nor-por": 1824,
1827
+ "Translation nor-spa": 1825,
1828
+ "Translation npi-deu": 1826,
1829
+ "Translation npi-eng": 1827,
1830
+ "Translation npi-fra": 1828,
1831
+ "Translation npi-por": 1829,
1832
+ "Translation npi-spa": 1830,
1833
+ "Translation nso-deu": 1831,
1834
+ "Translation nso-eng": 1832,
1835
+ "Translation nso-fra": 1833,
1836
+ "Translation nso-por": 1834,
1837
+ "Translation nso-spa": 1835,
1838
+ "Translation nya-deu": 1836,
1839
+ "Translation nya-eng": 1837,
1840
+ "Translation nya-fra": 1838,
1841
+ "Translation nya-por": 1839,
1842
+ "Translation nya-spa": 1840,
1843
+ "Translation oci-ast": 1841,
1844
+ "Translation oci-cat": 1842,
1845
+ "Translation oci-deu": 1843,
1846
+ "Translation oci-eng": 1844,
1847
+ "Translation oci-fra": 1845,
1848
+ "Translation oci-glg": 1846,
1849
+ "Translation oci-ita": 1847,
1850
+ "Translation oci-por": 1848,
1851
+ "Translation oci-ron": 1849,
1852
+ "Translation oci-spa": 1850,
1853
+ "Translation oci-tur": 1851,
1854
+ "Translation ofs-bar": 1852,
1855
+ "Translation pag-fra": 1853,
1856
+ "Translation pag-por": 1854,
1857
+ "Translation pag-spa": 1855,
1858
+ "Translation pan-deu": 1856,
1859
+ "Translation pan-eng": 1857,
1860
+ "Translation pan-fra": 1858,
1861
+ "Translation pan-por": 1859,
1862
+ "Translation pan-spa": 1860,
1863
+ "Translation pap-deu": 1861,
1864
+ "Translation pap-eng": 1862,
1865
+ "Translation pap-fra": 1863,
1866
+ "Translation pap-por": 1864,
1867
+ "Translation pap-spa": 1865,
1868
+ "Translation pdc-deu": 1866,
1869
+ "Translation pdc-eng": 1867,
1870
+ "Translation pes-deu": 1868,
1871
+ "Translation pes-eng": 1869,
1872
+ "Translation pes-fra": 1870,
1873
+ "Translation pes-por": 1871,
1874
+ "Translation pes-spa": 1872,
1875
+ "Translation plt-eng": 1873,
1876
+ "Translation plt-fra": 1874,
1877
+ "Translation plt-por": 1875,
1878
+ "Translation plt-spa": 1876,
1879
+ "Translation pms-eng": 1877,
1880
+ "Translation pms-ita": 1878,
1881
+ "Translation pol-bel": 1879,
1882
+ "Translation pol-deu": 1880,
1883
+ "Translation pol-eng": 1881,
1884
+ "Translation pol-fra": 1882,
1885
+ "Translation pol-por": 1883,
1886
+ "Translation pol-rus": 1884,
1887
+ "Translation pol-spa": 1885,
1888
+ "Translation pol-ukr": 1886,
1889
+ "Translation por-afr": 1887,
1890
+ "Translation por-ara": 1888,
1891
+ "Translation por-ast": 1889,
1892
+ "Translation por-bel": 1890,
1893
+ "Translation por-ben": 1891,
1894
+ "Translation por-bul": 1892,
1895
+ "Translation por-cat": 1893,
1896
+ "Translation por-ces": 1894,
1897
+ "Translation por-cym": 1895,
1898
+ "Translation por-dan": 1896,
1899
+ "Translation por-deu": 1897,
1900
+ "Translation por-ell": 1898,
1901
+ "Translation por-eng": 1899,
1902
+ "Translation por-est": 1900,
1903
+ "Translation por-fao": 1901,
1904
+ "Translation por-fas": 1902,
1905
+ "Translation por-fin": 1903,
1906
+ "Translation por-fra": 1904,
1907
+ "Translation por-fur": 1905,
1908
+ "Translation por-gle": 1906,
1909
+ "Translation por-glg": 1907,
1910
+ "Translation por-guj": 1908,
1911
+ "Translation por-hat": 1909,
1912
+ "Translation por-hau": 1910,
1913
+ "Translation por-heb": 1911,
1914
+ "Translation por-hin": 1912,
1915
+ "Translation por-hne": 1913,
1916
+ "Translation por-hrv": 1914,
1917
+ "Translation por-hun": 1915,
1918
+ "Translation por-isl": 1916,
1919
+ "Translation por-ita": 1917,
1920
+ "Translation por-kea": 1918,
1921
+ "Translation por-lav": 1919,
1922
+ "Translation por-lij": 1920,
1923
+ "Translation por-lin": 1921,
1924
+ "Translation por-lit": 1922,
1925
+ "Translation por-ltz": 1923,
1926
+ "Translation por-mag": 1924,
1927
+ "Translation por-mkd": 1925,
1928
+ "Translation por-mlt": 1926,
1929
+ "Translation por-nds": 1927,
1930
+ "Translation por-nep": 1928,
1931
+ "Translation por-nld": 1929,
1932
+ "Translation por-nno": 1930,
1933
+ "Translation por-nob": 1931,
1934
+ "Translation por-nor": 1932,
1935
+ "Translation por-oci": 1933,
1936
+ "Translation por-pan": 1934,
1937
+ "Translation por-pap": 1935,
1938
+ "Translation por-pes": 1936,
1939
+ "Translation por-pol": 1937,
1940
+ "Translation por-por": 1938,
1941
+ "Translation por-prs": 1939,
1942
+ "Translation por-pus": 1940,
1943
+ "Translation por-ron": 1941,
1944
+ "Translation por-rus": 1942,
1945
+ "Translation por-slk": 1943,
1946
+ "Translation por-slv": 1944,
1947
+ "Translation por-spa": 1945,
1948
+ "Translation por-sqi": 1946,
1949
+ "Translation por-srd": 1947,
1950
+ "Translation por-srp_Cyrl": 1948,
1951
+ "Translation por-swa": 1949,
1952
+ "Translation por-swe": 1950,
1953
+ "Translation por-tgk": 1951,
1954
+ "Translation por-tpi": 1952,
1955
+ "Translation por-tsn": 1953,
1956
+ "Translation por-tur": 1954,
1957
+ "Translation por-ukr": 1955,
1958
+ "Translation por-urd": 1956,
1959
+ "Translation por-vie": 1957,
1960
+ "Translation prs-deu": 1958,
1961
+ "Translation prs-eng": 1959,
1962
+ "Translation prs-fra": 1960,
1963
+ "Translation prs-por": 1961,
1964
+ "Translation prs-spa": 1962,
1965
+ "Translation pus-deu": 1963,
1966
+ "Translation pus-eng": 1964,
1967
+ "Translation pus-fra": 1965,
1968
+ "Translation pus-por": 1966,
1969
+ "Translation pus-spa": 1967,
1970
+ "Translation ron-ara": 1968,
1971
+ "Translation ron-ast": 1969,
1972
+ "Translation ron-cat": 1970,
1973
+ "Translation ron-deu": 1971,
1974
+ "Translation ron-eng": 1972,
1975
+ "Translation ron-fra": 1973,
1976
+ "Translation ron-glg": 1974,
1977
+ "Translation ron-heb": 1975,
1978
+ "Translation ron-ita": 1976,
1979
+ "Translation ron-oci": 1977,
1980
+ "Translation ron-por": 1978,
1981
+ "Translation ron-spa": 1979,
1982
+ "Translation ron-tur": 1980,
1983
+ "Translation ron-ukr": 1981,
1984
+ "Translation ru-en": 1982,
1985
+ "Translation run-deu": 1983,
1986
+ "Translation run-eng": 1984,
1987
+ "Translation run-fra": 1985,
1988
+ "Translation run-por": 1986,
1989
+ "Translation run-spa": 1987,
1990
+ "Translation rus-ast": 1988,
1991
+ "Translation rus-bel": 1989,
1992
+ "Translation rus-bul": 1990,
1993
+ "Translation rus-cat": 1991,
1994
+ "Translation rus-ces": 1992,
1995
+ "Translation rus-dan": 1993,
1996
+ "Translation rus-deu": 1994,
1997
+ "Translation rus-eng": 1995,
1998
+ "Translation rus-fin": 1996,
1999
+ "Translation rus-fra": 1997,
2000
+ "Translation rus-glg": 1998,
2001
+ "Translation rus-hbs": 1999,
2002
+ "Translation rus-hrv": 2000,
2003
+ "Translation rus-ita": 2001,
2004
+ "Translation rus-lav": 2002,
2005
+ "Translation rus-lit": 2003,
2006
+ "Translation rus-mkd": 2004,
2007
+ "Translation rus-nob": 2005,
2008
+ "Translation rus-oci": 2006,
2009
+ "Translation rus-pol": 2007,
2010
+ "Translation rus-por": 2008,
2011
+ "Translation rus-ron": 2009,
2012
+ "Translation rus-slv": 2010,
2013
+ "Translation rus-spa": 2011,
2014
+ "Translation rus-srp_Cyrl": 2012,
2015
+ "Translation rus-srp_Latn": 2013,
2016
+ "Translation rus-swe": 2014,
2017
+ "Translation rus-ukr": 2015,
2018
+ "Translation san-eng": 2016,
2019
+ "Translation scn-deu": 2017,
2020
+ "Translation scn-eng": 2018,
2021
+ "Translation scn-fra": 2019,
2022
+ "Translation scn-por": 2020,
2023
+ "Translation scn-spa": 2021,
2024
+ "Translation sco-eng": 2022,
2025
+ "Translation sco-nld": 2023,
2026
+ "Translation sin-deu": 2024,
2027
+ "Translation sin-eng": 2025,
2028
+ "Translation sin-fra": 2026,
2029
+ "Translation sin-por": 2027,
2030
+ "Translation sin-spa": 2028,
2031
+ "Translation slk-deu": 2029,
2032
+ "Translation slk-eng": 2030,
2033
+ "Translation slk-fra": 2031,
2034
+ "Translation slk-por": 2032,
2035
+ "Translation slk-spa": 2033,
2036
+ "Translation slk-ukr": 2034,
2037
+ "Translation slv-deu": 2035,
2038
+ "Translation slv-eng": 2036,
2039
+ "Translation slv-fra": 2037,
2040
+ "Translation slv-ita": 2038,
2041
+ "Translation slv-por": 2039,
2042
+ "Translation slv-ron": 2040,
2043
+ "Translation slv-rus": 2041,
2044
+ "Translation slv-spa": 2042,
2045
+ "Translation slv-ukr": 2043,
2046
+ "Translation smp-sam": 2044,
2047
+ "Translation sna-eng": 2045,
2048
+ "Translation sna-fra": 2046,
2049
+ "Translation sna-por": 2047,
2050
+ "Translation sna-spa": 2048,
2051
+ "Translation som-deu": 2049,
2052
+ "Translation som-eng": 2050,
2053
+ "Translation som-fra": 2051,
2054
+ "Translation som-por": 2052,
2055
+ "Translation som-spa": 2053,
2056
+ "Translation sot-deu": 2054,
2057
+ "Translation sot-eng": 2055,
2058
+ "Translation sot-fra": 2056,
2059
+ "Translation sot-por": 2057,
2060
+ "Translation sot-spa": 2058,
2061
+ "Translation spa-afr": 2059,
2062
+ "Translation spa-ara": 2060,
2063
+ "Translation spa-ast": 2061,
2064
+ "Translation spa-bel": 2062,
2065
+ "Translation spa-ben": 2063,
2066
+ "Translation spa-bul": 2064,
2067
+ "Translation spa-cat": 2065,
2068
+ "Translation spa-ces": 2066,
2069
+ "Translation spa-cym": 2067,
2070
+ "Translation spa-dan": 2068,
2071
+ "Translation spa-deu": 2069,
2072
+ "Translation spa-ell": 2070,
2073
+ "Translation spa-eng": 2071,
2074
+ "Translation spa-est": 2072,
2075
+ "Translation spa-eus": 2073,
2076
+ "Translation spa-fao": 2074,
2077
+ "Translation spa-fas": 2075,
2078
+ "Translation spa-fin": 2076,
2079
+ "Translation spa-fra": 2077,
2080
+ "Translation spa-fur": 2078,
2081
+ "Translation spa-gla": 2079,
2082
+ "Translation spa-gle": 2080,
2083
+ "Translation spa-glg": 2081,
2084
+ "Translation spa-hat": 2082,
2085
+ "Translation spa-hau": 2083,
2086
+ "Translation spa-hbs": 2084,
2087
+ "Translation spa-heb": 2085,
2088
+ "Translation spa-hin": 2086,
2089
+ "Translation spa-hne": 2087,
2090
+ "Translation spa-hrv": 2088,
2091
+ "Translation spa-hun": 2089,
2092
+ "Translation spa-isl": 2090,
2093
+ "Translation spa-ita": 2091,
2094
+ "Translation spa-lad": 2092,
2095
+ "Translation spa-lad_Latn": 2093,
2096
+ "Translation spa-lav": 2094,
2097
+ "Translation spa-lij": 2095,
2098
+ "Translation spa-lin": 2096,
2099
+ "Translation spa-lit": 2097,
2100
+ "Translation spa-mag": 2098,
2101
+ "Translation spa-mar": 2099,
2102
+ "Translation spa-mkd": 2100,
2103
+ "Translation spa-mlt": 2101,
2104
+ "Translation spa-nep": 2102,
2105
+ "Translation spa-nld": 2103,
2106
+ "Translation spa-nno": 2104,
2107
+ "Translation spa-nob": 2105,
2108
+ "Translation spa-nor": 2106,
2109
+ "Translation spa-oci": 2107,
2110
+ "Translation spa-pan": 2108,
2111
+ "Translation spa-pap": 2109,
2112
+ "Translation spa-pes": 2110,
2113
+ "Translation spa-pol": 2111,
2114
+ "Translation spa-por": 2112,
2115
+ "Translation spa-prs": 2113,
2116
+ "Translation spa-pus": 2114,
2117
+ "Translation spa-ron": 2115,
2118
+ "Translation spa-rus": 2116,
2119
+ "Translation spa-slk": 2117,
2120
+ "Translation spa-slv": 2118,
2121
+ "Translation spa-spa": 2119,
2122
+ "Translation spa-sqi": 2120,
2123
+ "Translation spa-srd": 2121,
2124
+ "Translation spa-srp_Cyrl": 2122,
2125
+ "Translation spa-swa": 2123,
2126
+ "Translation spa-swe": 2124,
2127
+ "Translation spa-tgk": 2125,
2128
+ "Translation spa-tpi": 2126,
2129
+ "Translation spa-tsn": 2127,
2130
+ "Translation spa-tur": 2128,
2131
+ "Translation spa-ukr": 2129,
2132
+ "Translation spa-urd": 2130,
2133
+ "Translation spa-vie": 2131,
2134
+ "Translation sqi-deu": 2132,
2135
+ "Translation sqi-eng": 2133,
2136
+ "Translation sqi-fra": 2134,
2137
+ "Translation sqi-por": 2135,
2138
+ "Translation sqi-spa": 2136,
2139
+ "Translation srd-deu": 2137,
2140
+ "Translation srd-eng": 2138,
2141
+ "Translation srd-fra": 2139,
2142
+ "Translation srd-por": 2140,
2143
+ "Translation srd-spa": 2141,
2144
+ "Translation srn-eng": 2142,
2145
+ "Translation srp_Cyrl-deu": 2143,
2146
+ "Translation srp_Cyrl-eng": 2144,
2147
+ "Translation srp_Cyrl-fra": 2145,
2148
+ "Translation srp_Cyrl-ita": 2146,
2149
+ "Translation srp_Cyrl-por": 2147,
2150
+ "Translation srp_Cyrl-ron": 2148,
2151
+ "Translation srp_Cyrl-rus": 2149,
2152
+ "Translation srp_Cyrl-spa": 2150,
2153
+ "Translation srp_Cyrl-ukr": 2151,
2154
+ "Translation srp_Latn-deu": 2152,
2155
+ "Translation srp_Latn-eng": 2153,
2156
+ "Translation srp_Latn-ita": 2154,
2157
+ "Translation srp_Latn-rus": 2155,
2158
+ "Translation srp_Latn-ukr": 2156,
2159
+ "Translation ssw-eng": 2157,
2160
+ "Translation ssw-fra": 2158,
2161
+ "Translation ssw-por": 2159,
2162
+ "Translation ssw-spa": 2160,
2163
+ "Translation stq-deu": 2161,
2164
+ "Translation stq-eng": 2162,
2165
+ "Translation stq-nld": 2163,
2166
+ "Translation swa-deu": 2164,
2167
+ "Translation swa-eng": 2165,
2168
+ "Translation swa-fra": 2166,
2169
+ "Translation swa-por": 2167,
2170
+ "Translation swa-spa": 2168,
2171
+ "Translation swe-ara": 2169,
2172
+ "Translation swe-cat": 2170,
2173
+ "Translation swe-ces": 2171,
2174
+ "Translation swe-dan": 2172,
2175
+ "Translation swe-deu": 2173,
2176
+ "Translation swe-eng": 2174,
2177
+ "Translation swe-fra": 2175,
2178
+ "Translation swe-glg": 2176,
2179
+ "Translation swe-heb": 2177,
2180
+ "Translation swe-isl": 2178,
2181
+ "Translation swe-ita": 2179,
2182
+ "Translation swe-nob": 2180,
2183
+ "Translation swe-pol": 2181,
2184
+ "Translation swe-por": 2182,
2185
+ "Translation swe-ron": 2183,
2186
+ "Translation swe-rus": 2184,
2187
+ "Translation swe-spa": 2185,
2188
+ "Translation swe-tur": 2186,
2189
+ "Translation swe-ukr": 2187,
2190
+ "Translation swg-eng": 2188,
2191
+ "Translation swg-nld": 2189,
2192
+ "Translation swh-deu": 2190,
2193
+ "Translation swh-eng": 2191,
2194
+ "Translation swh-fra": 2192,
2195
+ "Translation swh-por": 2193,
2196
+ "Translation swh-spa": 2194,
2197
+ "Translation szl-deu": 2195,
2198
+ "Translation szl-eng": 2196,
2199
+ "Translation szl-fra": 2197,
2200
+ "Translation szl-por": 2198,
2201
+ "Translation szl-spa": 2199,
2202
+ "Translation tgk-deu": 2200,
2203
+ "Translation tgk-eng": 2201,
2204
+ "Translation tgk-fra": 2202,
2205
+ "Translation tgk-por": 2203,
2206
+ "Translation tgk-spa": 2204,
2207
+ "Translation tgk_Cyrl-deu": 2205,
2208
+ "Translation tgk_Cyrl-eng": 2206,
2209
+ "Translation tgk_Cyrl-fra": 2207,
2210
+ "Translation tgk_Cyrl-por": 2208,
2211
+ "Translation tgk_Cyrl-spa": 2209,
2212
+ "Translation tha-eng": 2210,
2213
+ "Translation tir-eng": 2211,
2214
+ "Translation tir-spa": 2212,
2215
+ "Translation tpi-deu": 2213,
2216
+ "Translation tpi-eng": 2214,
2217
+ "Translation tpi-fra": 2215,
2218
+ "Translation tpi-por": 2216,
2219
+ "Translation tpi-spa": 2217,
2220
+ "Translation tsn-deu": 2218,
2221
+ "Translation tsn-eng": 2219,
2222
+ "Translation tsn-fra": 2220,
2223
+ "Translation tsn-por": 2221,
2224
+ "Translation tsn-spa": 2222,
2225
+ "Translation tso-eng": 2223,
2226
+ "Translation tso-fra": 2224,
2227
+ "Translation tso-por": 2225,
2228
+ "Translation tur-eng": 2226,
2229
+ "Translation tur-ukr": 2227,
2230
+ "Translation ukr-ast": 2228,
2231
+ "Translation ukr-bel": 2229,
2232
+ "Translation ukr-bul": 2230,
2233
+ "Translation ukr-cat": 2231,
2234
+ "Translation ukr-ces": 2232,
2235
+ "Translation ukr-dan": 2233,
2236
+ "Translation ukr-deu": 2234,
2237
+ "Translation ukr-eng": 2235,
2238
+ "Translation ukr-fin": 2236,
2239
+ "Translation ukr-fra": 2237,
2240
+ "Translation ukr-glg": 2238,
2241
+ "Translation ukr-hbs": 2239,
2242
+ "Translation ukr-hrv": 2240,
2243
+ "Translation ukr-hun": 2241,
2244
+ "Translation ukr-ita": 2242,
2245
+ "Translation ukr-lav": 2243,
2246
+ "Translation ukr-lit": 2244,
2247
+ "Translation ukr-mkd": 2245,
2248
+ "Translation ukr-nob": 2246,
2249
+ "Translation ukr-oci": 2247,
2250
+ "Translation ukr-pol": 2248,
2251
+ "Translation ukr-por": 2249,
2252
+ "Translation ukr-ron": 2250,
2253
+ "Translation ukr-rus": 2251,
2254
+ "Translation ukr-slk": 2252,
2255
+ "Translation ukr-slv": 2253,
2256
+ "Translation ukr-spa": 2254,
2257
+ "Translation ukr-srp_Cyrl": 2255,
2258
+ "Translation ukr-srp_Latn": 2256,
2259
+ "Translation ukr-swe": 2257,
2260
+ "Translation ukr-tur": 2258,
2261
+ "Translation urd-deu": 2259,
2262
+ "Translation urd-eng": 2260,
2263
+ "Translation urd-fra": 2261,
2264
+ "Translation urd-por": 2262,
2265
+ "Translation urd-spa": 2263,
2266
+ "Translation vec-deu": 2264,
2267
+ "Translation vec-eng": 2265,
2268
+ "Translation vec-fra": 2266,
2269
+ "Translation vec-por": 2267,
2270
+ "Translation vec-spa": 2268,
2271
+ "Translation ven-eng": 2269,
2272
+ "Translation ven-fra": 2270,
2273
+ "Translation ven-por": 2271,
2274
+ "Translation ven-spa": 2272,
2275
+ "Translation vie-eng": 2273,
2276
+ "Translation xho-deu": 2274,
2277
+ "Translation xho-eng": 2275,
2278
+ "Translation xho-fra": 2276,
2279
+ "Translation xho-por": 2277,
2280
+ "Translation xho-spa": 2278,
2281
+ "Translation yid-eng": 2279,
2282
+ "Translation yid-fra": 2280,
2283
+ "Translation yid-spa": 2281,
2284
+ "Translation yor-eng": 2282,
2285
+ "Translation zea-deu": 2283,
2286
+ "Translation zea-eng": 2284,
2287
+ "Translation zea-fry": 2285,
2288
+ "Translation zea-nds": 2286,
2289
+ "Translation zea-nld": 2287,
2290
+ "Translation zho-eng": 2288,
2291
+ "Translation zho-jpn": 2289,
2292
+ "Translation zul-deu": 2290,
2293
+ "Translation zul-eng": 2291,
2294
+ "Translation zul-fra": 2292,
2295
+ "Translation zul-por": 2293,
2296
+ "Translation zul-spa": 2294,
2297
+ "Triplet": 2295,
2298
+ "TriviaQA": 2296,
2299
+ "TruthfulQA": 2297,
2300
+ "TruthfulQA (MC2)": 2298,
2301
+ "TruthfulQA Generation": 2299,
2302
+ "Truthfulness": 2300,
2303
+ "Truthfulness in answers": 2301,
2304
+ "Truthfulness in question answering": 2302,
2305
+ "Turn Detection": 2303,
2306
+ "Type prediction": 2304,
2307
+ "UFD": 2305,
2308
+ "UI Element Detection": 2306,
2309
+ "UNLABELED_DEPENDENCIES": 2307,
2310
+ "Uncensored Response": 2308,
2311
+ "Unsupervised Domain Adaptation": 2309,
2312
+ "Unsupervised Instance Segmentation": 2310,
2313
+ "Unsupervised Object Segmentation": 2311,
2314
+ "Unsupervised Semantic Segmentation": 2312,
2315
+ "Urdu Speech Recognition": 2313,
2316
+ "User Feedback Classification": 2314,
2317
+ "Uzbek Language Understanding": 2315,
2318
+ "VCGBench-Diverse": 2316,
2319
+ "VLA": 2317,
2320
+ "VQAv2": 2318,
2321
+ "VSI-Bench": 2319,
2322
+ "Vehicle Re-Identification": 2320,
2323
+ "Verbalized Rebus Solving": 2321,
2324
+ "Video Captioning": 2322,
2325
+ "Video Classification": 2323,
2326
+ "Video Crime Detection": 2324,
2327
+ "Video Frame Interpolation": 2325,
2328
+ "Video Generation": 2326,
2329
+ "Video Grounding": 2327,
2330
+ "Video Instance Segmentation": 2328,
2331
+ "Video Object Segmentation": 2329,
2332
+ "Video Prediction": 2330,
2333
+ "Video Question Answering": 2331,
2334
+ "Video Reconstruction": 2332,
2335
+ "Video Retrieval": 2333,
2336
+ "Video Summarization": 2334,
2337
+ "Video Super-Resolution": 2335,
2338
+ "Video-based Generative Performance Benchmarking": 2336,
2339
+ "Video-based Generative Performance Benchmarking (Correctness of Information)": 2337,
2340
+ "VideoMME": 2338,
2341
+ "VideoMMMU": 2339,
2342
+ "Vietnamese Banking Aspect Sentiment Analysis": 2340,
2343
+ "Vietnamese Banking Text Classification": 2341,
2344
+ "Vietnamese General Sentiment Analysis": 2342,
2345
+ "Vietnamese Medical Abstractive Question Answering": 2343,
2346
+ "Vietnamese Natural Language Inference": 2344,
2347
+ "Vietnamese News Classification": 2345,
2348
+ "VilaQuAD": 2346,
2349
+ "Violence Detection": 2347,
2350
+ "ViquiQuAD": 2348,
2351
+ "Vision-Language-Action Navigation": 2349,
2352
+ "Vision-and-Language Navigation": 2350,
2353
+ "Vision-based Classification": 2351,
2354
+ "Visual Object Tracking": 2352,
2355
+ "Visual Place Recognition": 2353,
2356
+ "Visual Prompt Tuning": 2354,
2357
+ "Visual Question Answering": 2355,
2358
+ "Visual Question Answering (VQA)": 2356,
2359
+ "Visual Reasoning": 2357,
2360
+ "Visual Servoing": 2358,
2361
+ "Visual Storytelling": 2359,
2362
+ "Visual Tracking": 2360,
2363
+ "Visual math reasoning": 2361,
2364
+ "Visual question answering": 2362,
2365
+ "Visual scientific knowledge reasoning": 2363,
2366
+ "Voice Activity Detection": 2364,
2367
+ "Voice Conversion": 2365,
2368
+ "Voice Emotion Recognition": 2366,
2369
+ "Waste Classification": 2367,
2370
+ "WideSearch": 2368,
2371
+ "Wikipedia Summarization": 2369,
2372
+ "Wikitext-fr": 2370,
2373
+ "WinoG": 2371,
2374
+ "WinoGrande": 2372,
2375
+ "Winogrande": 2373,
2376
+ "Winogrande Challenge": 2374,
2377
+ "Word Sense Disambiguation": 2375,
2378
+ "Word Similarity": 2376,
2379
+ "Word prediction": 2377,
2380
+ "XQuAD-ca": 2378,
2381
+ "Yes/No Question Classification": 2379,
2382
+ "Zero Shot Classification": 2380,
2383
+ "Zero Shot Classifications": 2381,
2384
+ "Zero Shot Segmentation": 2382,
2385
+ "Zero shot Classification": 2383,
2386
+ "Zero-Shot Action Recognition": 2384,
2387
+ "Zero-Shot Baseline": 2385,
2388
+ "Zero-Shot Classification": 2386,
2389
+ "Zero-Shot Emergence Detection": 2387,
2390
+ "Zero-Shot Text Classification": 2388,
2391
+ "Zero-Shot Transfer Image Classification": 2389,
2392
+ "Zero-Shot Video Retrieval": 2390,
2393
+ "Zero-shot": 2391,
2394
+ "Zero-shot (binary)": 2392,
2395
+ "Zero-shot Classification": 2393,
2396
+ "Zero-shot Generalization": 2394,
2397
+ "Zero-shot Sentiment Classification": 2395,
2398
+ "abstractive summarization": 2396,
2399
+ "agieval": 2397,
2400
+ "answerability prediction": 2398,
2401
+ "any-to-any": 2399,
2402
+ "arc_ca_challenge": 2400,
2403
+ "arc_ca_easy": 2401,
2404
+ "arc_easy": 2402,
2405
+ "audio classification": 2403,
2406
+ "audio-classification": 2404,
2407
+ "audio-text-retrieval": 2405,
2408
+ "automatic-speech-recognition": 2406,
2409
+ "automatic-speech-translation": 2407,
2410
+ "binary-classification": 2408,
2411
+ "binary_classification": 2409,
2412
+ "catalanqa": 2410,
2413
+ "chinese-evaluation": 2411,
2414
+ "chunking": 2412,
2415
+ "classification": 2413,
2416
+ "classify nepali news": 2414,
2417
+ "clustering": 2415,
2418
+ "code": 2416,
2419
+ "code generation": 2417,
2420
+ "code-evaluation": 2418,
2421
+ "code-generation": 2419,
2422
+ "commonsense-reasoning": 2420,
2423
+ "copa_ca": 2421,
2424
+ "coreference-resolution": 2422,
2425
+ "defect-detection": 2423,
2426
+ "diamond": 2424,
2427
+ "document-image-classification": 2425,
2428
+ "entity-linking": 2426,
2429
+ "eq_bench": 2427,
2430
+ "evaluation": 2428,
2431
+ "exam": 2429,
2432
+ "fact-verification": 2430,
2433
+ "feature-extraction": 2431,
2434
+ "few-shot": 2432,
2435
+ "few-shot-ner": 2433,
2436
+ "fill-mask": 2434,
2437
+ "flores_ca": 2435,
2438
+ "formal language correction": 2436,
2439
+ "get-answer": 2437,
2440
+ "gsgsm8k": 2438,
2441
+ "gsm8k": 2439,
2442
+ "haerae": 2440,
2443
+ "humaneval": 2441,
2444
+ "image-captioning": 2442,
2445
+ "image-classification": 2443,
2446
+ "image-segmentation": 2444,
2447
+ "image-similarity": 2445,
2448
+ "image-text-retrieval": 2446,
2449
+ "image-text-to-text": 2447,
2450
+ "image-to-image": 2448,
2451
+ "image-to-text": 2449,
2452
+ "information-retrieval": 2450,
2453
+ "instance-segmentation": 2451,
2454
+ "instruction": 2452,
2455
+ "intent classification": 2453,
2456
+ "intent-classification": 2454,
2457
+ "kmmlu": 2455,
2458
+ "knowledge": 2456,
2459
+ "low-light-image-enhancement": 2457,
2460
+ "math": 2458,
2461
+ "math-evaluation": 2459,
2462
+ "mathematical-reasoning": 2460,
2463
+ "mbpp": 2461,
2464
+ "mix": 2462,
2465
+ "mmlu": 2463,
2466
+ "multi-label text-classification": 2464,
2467
+ "multi-label-classification": 2465,
2468
+ "multi-task-evaluation": 2466,
2469
+ "multi_class_classification": 2467,
2470
+ "multi_label_classification": 2468,
2471
+ "multimodal": 2469,
2472
+ "multiple-choice": 2470,
2473
+ "multiple-choice-qa": 2471,
2474
+ "multiple-choice-question-answering": 2472,
2475
+ "multiple_choice": 2473,
2476
+ "named-entity-recognition": 2474,
2477
+ "narratives": 2475,
2478
+ "natural-language-inference": 2476,
2479
+ "ner": 2477,
2480
+ "object-classification": 2478,
2481
+ "object-detection": 2479,
2482
+ "original-capability": 2480,
2483
+ "phoneme-classification": 2481,
2484
+ "preference_evaluation": 2482,
2485
+ "pretraining-evaluation": 2483,
2486
+ "question-answering": 2484,
2487
+ "reasoning": 2485,
2488
+ "regression": 2486,
2489
+ "reinforcement-learning": 2487,
2490
+ "reinforcement-learning for quadrangular mesh topological optimization": 2488,
2491
+ "retrieval": 2489,
2492
+ "robotics": 2490,
2493
+ "semantic textual similarity": 2491,
2494
+ "semantic-segmentation": 2492,
2495
+ "semantic-similarity": 2493,
2496
+ "sentence-similarity": 2494,
2497
+ "sentiment analysis": 2495,
2498
+ "sentiment-analysis": 2496,
2499
+ "sentiment-classification": 2497,
2500
+ "sequence-classification": 2498,
2501
+ "slot-filling": 2499,
2502
+ "speech-recognition": 2500,
2503
+ "speech-to-text": 2501,
2504
+ "speech-translation": 2502,
2505
+ "stem": 2503,
2506
+ "streaming-transcription-chunk-100msec": 2504,
2507
+ "streaming-transcription-chunk-200msec": 2505,
2508
+ "streaming-transcription-chunk-300msec": 2506,
2509
+ "streaming-transcription-chunk-40msec": 2507,
2510
+ "structured sentiment analysis": 2508,
2511
+ "structured-data-classification": 2509,
2512
+ "structured-information-extraction": 2510,
2513
+ "summarization": 2511,
2514
+ "symbolic music representation learning": 2512,
2515
+ "tabular-classification": 2513,
2516
+ "tabular-regression": 2514,
2517
+ "tau2-bench": 2515,
2518
+ "text generation": 2516,
2519
+ "text political leaning classification": 2517,
2520
+ "text-classfication": 2518,
2521
+ "text-classification": 2519,
2522
+ "text-generation": 2520,
2523
+ "text-prediction": 2521,
2524
+ "text-ranking": 2522,
2525
+ "text-summarization": 2523,
2526
+ "text-to-audio": 2524,
2527
+ "text-to-image": 2525,
2528
+ "text-to-speech": 2526,
2529
+ "text-to-sql": 2527,
2530
+ "text_classification": 2528,
2531
+ "token-classification": 2529,
2532
+ "tomato leaf disease detection": 2530,
2533
+ "translation": 2531,
2534
+ "translation en-me": 2532,
2535
+ "translation, speech-translation": 2533,
2536
+ "truthfulqa": 2534,
2537
+ "truthfulqa_gen": 2535,
2538
+ "video caption": 2536,
2539
+ "video detailed caption": 2537,
2540
+ "video question anwering": 2538,
2541
+ "video-captioning": 2539,
2542
+ "video-classification": 2540,
2543
+ "video-text-to-text": 2541,
2544
+ "visual-question-answering": 2542,
2545
+ "voice-conversion": 2543,
2546
+ "winogrande": 2544,
2547
+ "word-similarity": 2545,
2548
+ "zero-shot retrieval": 2546,
2549
+ "zero-shot-classification": 2547,
2550
+ "zero-shot-image-classification": 2548,
2551
+ "ΔWP regression (go / field goal / punt)": 2549,
2552
+ "Классификация текста": 2550
2553
+ }
inference_lib.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Self-contained inference module for the recommendation web app.
2
+
3
+ Contains a trimmed copy of ``MLPMetric`` (and its dependencies) so HF Spaces
4
+ deployments do not need to ship the full ``module/`` package. The class layout
5
+ and parameter names match the trained checkpoint exactly, so the original
6
+ ``state_dict`` loads with ``strict=False`` and a clean diff.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import math
12
+ import re
13
+ from typing import Optional
14
+
15
+ import torch
16
+ import torch.nn as nn
17
+
18
+
19
+ class ModelNameAvgEncoder(nn.Module):
20
+ """Hashed-token average over a model name. Optionally adds an ID embedding."""
21
+
22
+ def __init__(self, args, hash_buckets: int = 10000):
23
+ super().__init__()
24
+ self.hash_buckets = hash_buckets
25
+ self.tok_emb = nn.Embedding(self.hash_buckets, args.token_dim)
26
+ self.use_id_emb = bool(getattr(args, "use_id_emb", False))
27
+ if self.use_id_emb:
28
+ self.id_emb = nn.Embedding(args.num_models + 1, args.model_dim)
29
+ self.unk_model_id = args.num_models
30
+
31
+ @staticmethod
32
+ def _split(name: str):
33
+ n = (name or "").strip().lower()
34
+ if not n:
35
+ return []
36
+ toks = [n]
37
+ if "/" in n:
38
+ toks.append(n.split("/")[-1])
39
+ toks.extend([t for t in re.split(r"[\/_\-\s]+", n) if t])
40
+ out, seen = [], set()
41
+ for t in toks:
42
+ if t in seen:
43
+ continue
44
+ out.append(t)
45
+ seen.add(t)
46
+ return out
47
+
48
+ def _hash(self, tok: str):
49
+ return int(hashlib.md5(tok.encode()).hexdigest(), 16) % self.hash_buckets
50
+
51
+ def forward(self, model_ids: torch.LongTensor, model_names: list[str]):
52
+ device = self.tok_emb.weight.device
53
+ vecs = []
54
+ for n in model_names:
55
+ toks = self._split(n)
56
+ if not toks:
57
+ vecs.append(torch.zeros(self.tok_emb.embedding_dim, device=device))
58
+ continue
59
+ idxs = torch.tensor([self._hash(t) for t in toks], device=device, dtype=torch.long)
60
+ vecs.append(self.tok_emb(idxs).mean(dim=0))
61
+ h_name = torch.stack(vecs, dim=0)
62
+ feats = [h_name]
63
+ if self.use_id_emb:
64
+ feats.append(self.id_emb(model_ids.to(device)))
65
+ return torch.cat(feats, dim=-1)
66
+
67
+
68
+ class MLPMetric(nn.Module):
69
+ """MLP recommender that takes raw dataset description embeddings, plus
70
+ task / metric / size / family side features, and ranks model candidates.
71
+
72
+ Mirrors the checkpoint at
73
+ ``checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id``.
74
+ """
75
+
76
+ def __init__(self, args):
77
+ super().__init__()
78
+ self.use_id_emb = bool(getattr(args, "use_id_emb", False))
79
+ if self.use_id_emb:
80
+ self.model_embedding = nn.Embedding(args.num_models, args.model_dim)
81
+ else:
82
+ self.model_embedding = None
83
+
84
+ self.task_embedding = nn.Embedding(args.num_tasks, args.task_dim)
85
+ self.model_info_encoder = ModelNameAvgEncoder(args)
86
+ self.size_embedding = nn.Embedding(args.num_size_buckets, args.size_dim)
87
+ self.num_size_buckets = int(args.num_size_buckets)
88
+ self.use_size_prior = bool(getattr(args, "use_size_prior", True))
89
+
90
+ self.use_family_prior = bool(getattr(args, "use_family_prior", False))
91
+ if self.use_family_prior:
92
+ family_dim = int(getattr(args, "family_dim", args.size_dim))
93
+ self.family_embedding = nn.Embedding(args.num_families, family_dim)
94
+ self.family_dim = family_dim
95
+ else:
96
+ self.family_dim = 0
97
+
98
+ # Disable Model-Spider fusion path entirely (not used by this checkpoint).
99
+ self.use_ms_spider_repr = False
100
+ self.ms_fusion_dim = 0
101
+
102
+ model_info_dim = args.token_dim + (args.model_dim if self.use_id_emb else 0)
103
+ dataset_info_dim = args.dataset_desp_dim + args.task_dim
104
+ backbone_in_dim = (
105
+ model_info_dim + dataset_info_dim + args.size_dim + self.family_dim + self.ms_fusion_dim
106
+ )
107
+
108
+ # Backbone is rebuilt by the metric branch below; the base layers are kept here
109
+ # to match the parameter naming of the saved state dict.
110
+ self.backbone = nn.Sequential(
111
+ nn.Linear(backbone_in_dim, args.hidden_dim),
112
+ nn.ReLU(),
113
+ nn.Dropout(args.dropout_rate),
114
+ nn.Linear(args.hidden_dim, args.hidden_dim),
115
+ nn.ReLU(),
116
+ nn.Dropout(args.dropout_rate),
117
+ )
118
+ self.pairwise_head = nn.Linear(args.hidden_dim, 1)
119
+ self.pointwise_head = nn.Linear(args.hidden_dim, 1)
120
+
121
+ prior_in_dim = args.size_dim + self.family_dim
122
+ self.prior_head = nn.Sequential(
123
+ nn.Linear(prior_in_dim, args.hidden_dim // 2),
124
+ nn.ReLU(),
125
+ nn.Linear(args.hidden_dim // 2, 1),
126
+ )
127
+ self.temperature = nn.Parameter(torch.tensor(1.0))
128
+
129
+ # ---- metric extension (matches the MLPMetric subclass) ----
130
+ self.use_metric_embedding = bool(getattr(args, "use_metric_feature", True))
131
+ self.num_metrics = int(getattr(args, "num_metrics", 1))
132
+ self.metric_dim = int(getattr(args, "metric_dim", args.task_dim))
133
+ self.unknown_metric_id = int(getattr(args, "unknown_metric_id", 0))
134
+ if self.use_metric_embedding:
135
+ self.metric_embedding = nn.Embedding(max(self.num_metrics, 1), self.metric_dim)
136
+ in_features = self.backbone[0].in_features + self.metric_dim
137
+ hidden = self.backbone[0].out_features
138
+ dropout = self.backbone[2].p
139
+ self.backbone = nn.Sequential(
140
+ nn.Linear(in_features, hidden),
141
+ nn.ReLU(),
142
+ nn.Dropout(dropout),
143
+ nn.Linear(hidden, hidden),
144
+ nn.ReLU(),
145
+ nn.Dropout(dropout),
146
+ )
147
+ else:
148
+ self.metric_embedding = None
149
+
150
+ def encode_model(self, model_ids: torch.LongTensor, model_names: list[str]) -> torch.Tensor:
151
+ return self.model_info_encoder(model_ids, model_names)
152
+
153
+ @torch.no_grad()
154
+ def build_model_cache(
155
+ self,
156
+ all_model_names: list[str],
157
+ all_model_size_ids: torch.LongTensor,
158
+ all_model_family_ids: Optional[torch.LongTensor] = None,
159
+ device=None,
160
+ ):
161
+ if device is None:
162
+ device = next(self.parameters()).device
163
+ size_ids = all_model_size_ids.to(device=device, dtype=torch.long)
164
+ M = len(all_model_names)
165
+ assert size_ids.shape[0] == M
166
+ model_ids = torch.arange(M, device=device, dtype=torch.long)
167
+
168
+ h_model = self.encode_model(model_ids, all_model_names)
169
+ h_size = self.size_embedding(size_ids)
170
+ cache = {"h_model": h_model, "h_size": h_size, "size_ids": size_ids}
171
+ if self.use_family_prior and all_model_family_ids is not None:
172
+ family_ids = all_model_family_ids.to(device=device, dtype=torch.long)
173
+ cache["h_family"] = self.family_embedding(family_ids)
174
+ cache["family_ids"] = family_ids
175
+ else:
176
+ cache["h_family"] = None
177
+ cache["family_ids"] = None
178
+ return cache
179
+
180
+ def _metric_embed(
181
+ self, metric_ids: Optional[torch.LongTensor], batch_size: int, device
182
+ ) -> Optional[torch.Tensor]:
183
+ if not self.use_metric_embedding or self.metric_embedding is None:
184
+ return None
185
+ if metric_ids is None:
186
+ metric_ids = torch.full(
187
+ (batch_size,), int(self.unknown_metric_id), dtype=torch.long, device=device
188
+ )
189
+ return self.metric_embedding(metric_ids)
190
+
191
+ @torch.no_grad()
192
+ def score_matrix(
193
+ self,
194
+ task_ids: torch.LongTensor,
195
+ dataset_desp_batch: torch.Tensor,
196
+ model_cache: dict,
197
+ metric_ids: Optional[torch.LongTensor] = None,
198
+ chunk_size: int = 8192,
199
+ ) -> torch.Tensor:
200
+ device = dataset_desp_batch.device
201
+ B = dataset_desp_batch.size(0)
202
+
203
+ h_task = self.task_embedding(task_ids)
204
+ h_data = dataset_desp_batch
205
+ h_metric = self._metric_embed(metric_ids, B, device)
206
+
207
+ h_model_all = model_cache["h_model"]
208
+ h_size_all = model_cache["h_size"]
209
+ h_family_all = model_cache.get("h_family")
210
+ M = h_model_all.size(0)
211
+
212
+ if self.use_size_prior or self.use_family_prior:
213
+ if h_family_all is not None:
214
+ prior_inp_all = torch.cat([h_size_all, h_family_all], dim=-1)
215
+ else:
216
+ prior_inp_all = h_size_all
217
+ prior_all = self.prior_head(prior_inp_all).squeeze(-1)
218
+ else:
219
+ prior_all = torch.zeros(M, device=device)
220
+
221
+ out = torch.empty(B, M, device=device)
222
+ T = torch.clamp(self.temperature, min=1e-3)
223
+
224
+ start = 0
225
+ while start < M:
226
+ end = min(start + chunk_size, M)
227
+ m = end - start
228
+ h_model = h_model_all[start:end]
229
+ h_size = h_size_all[start:end]
230
+
231
+ h_model_exp = h_model.unsqueeze(0).expand(B, m, -1)
232
+ h_size_exp = h_size.unsqueeze(0).expand(B, m, -1)
233
+ h_data_exp = h_data.unsqueeze(1).expand(B, m, -1)
234
+ h_task_exp = h_task.unsqueeze(1).expand(B, m, -1)
235
+
236
+ parts = [h_model_exp, h_data_exp, h_size_exp]
237
+ if h_family_all is not None:
238
+ h_family_exp = h_family_all[start:end].unsqueeze(0).expand(B, m, -1)
239
+ parts.append(h_family_exp)
240
+ parts.append(h_task_exp)
241
+ if h_metric is not None:
242
+ parts.append(h_metric.unsqueeze(1).expand(B, m, -1))
243
+ residual_inp = torch.cat(parts, dim=-1)
244
+
245
+ h = self.backbone(residual_inp.reshape(B * m, -1))
246
+ s_chunk = self.pairwise_head(h).reshape(B, m)
247
+ prior_chunk = prior_all[start:end].unsqueeze(0)
248
+ out[:, start:end] = (s_chunk + prior_chunk) / T
249
+ start = end
250
+ return out
recommend.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Recommendation engine that loads the trained MLPMetric checkpoint plus the
2
+ pre-built model pool, and exposes ``Recommender.recommend`` for the Gradio app.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ import os
8
+ import re
9
+ import threading
10
+ from dataclasses import dataclass
11
+ from types import SimpleNamespace
12
+ from typing import List, Optional
13
+
14
+ import numpy as np
15
+ import torch
16
+
17
+ from inference_lib import MLPMetric
18
+
19
+
20
+ EMBEDDING_MODEL = "text-embedding-3-small" # Must match what was used during training.
21
+ EMBEDDING_DIM = 1536
22
+
23
+
24
+ # Official foundation-lab HuggingFace orgs (lowercase). Names whose owner falls
25
+ # in this set are considered "official pretrained" releases (Llama, Qwen,
26
+ # DeepSeek, Phi, Gemma, Mistral, Falcon, BLOOM, OLMo, Whisper, CLIP, ViT, ...).
27
+ OFFICIAL_ORGS: set[str] = {
28
+ # Modern LLMs
29
+ "deepseek-ai", "qwen", "openai", "meta-llama", "mistralai",
30
+ "google", "microsoft", "01-ai", "tiiuae", "stabilityai",
31
+ "nvidia", "ibm-granite", "eleutherai", "bigscience",
32
+ "allenai", "salesforce", "apple", "xai-org",
33
+ # Multimodal / CV / audio
34
+ "facebook", "naver-clova-ix",
35
+ # Encoders / retrieval
36
+ "sentence-transformers", "baai", "jinaai", "intfloat",
37
+ }
38
+
39
+ # Classic bare-name pretrained releases (no org prefix on HF) that we still
40
+ # count as "official" — e.g. the original Google BERT/T5, Facebook RoBERTa.
41
+ OFFICIAL_BARE_NAMES: set[str] = {
42
+ "bert-base-uncased", "bert-large-uncased",
43
+ "roberta-base", "roberta-large",
44
+ "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl",
45
+ "t5-base", "t5-large", "t5-3b", "t5-11b",
46
+ "distilbert-base-uncased", "albert-base-v2",
47
+ "xlm-roberta-base", "xlm-roberta-large",
48
+ }
49
+
50
+
51
+ def _is_official_name(name: str) -> bool:
52
+ n = name.strip()
53
+ if "/" in n:
54
+ return n.split("/", 1)[0].lower() in OFFICIAL_ORGS
55
+ return n.lower() in OFFICIAL_BARE_NAMES
56
+
57
+
58
+ def _slug(s: str) -> str:
59
+ return re.sub(r"[^a-z0-9]+", "", str(s).strip().lower())
60
+
61
+
62
+ def _build_alias_map(name2id: dict[str, int]) -> dict[str, int]:
63
+ """Loose lookup: lowercased, also a slugged form, also strip composite markers."""
64
+ out: dict[str, int] = {}
65
+ for k, v in name2id.items():
66
+ for alias in {k, k.strip().lower(), _slug(k)}:
67
+ if alias and alias not in out:
68
+ out[alias] = v
69
+ # composite metric keys like "task::metric" — also store the suffix
70
+ if "::" in k:
71
+ tail = k.split("::", 1)[1]
72
+ for alias in {tail, tail.strip().lower(), _slug(tail)}:
73
+ if alias and alias not in out:
74
+ out[alias] = v
75
+ return out
76
+
77
+
78
+ @dataclass
79
+ class Recommendation:
80
+ rank: int
81
+ model_name: str
82
+ score: float
83
+ size_bucket: int
84
+ size_b: float # raw size in billions of params; NaN if unknown
85
+ family_id: int
86
+ popularity: int
87
+ hf_url: str
88
+
89
+
90
+ class Recommender:
91
+ """Loads the checkpoint, model pool, and ID maps; exposes ``recommend``."""
92
+
93
+ def __init__(
94
+ self,
95
+ checkpoint_path: str,
96
+ args_path: str,
97
+ data_dir: str,
98
+ pool_path: str,
99
+ device: str = "cpu",
100
+ ):
101
+ self.device = torch.device(device)
102
+
103
+ with open(args_path) as f:
104
+ self._train_args = json.load(f)
105
+ with open(os.path.join(data_dir, "task2id.json")) as f:
106
+ self.task2id: dict[str, int] = json.load(f)
107
+ with open(os.path.join(data_dir, "metric2id.json")) as f:
108
+ metric2id_raw: dict[str, int] = json.load(f)
109
+ # The training-time metric vocab is the raw composite keys; expose both
110
+ # the raw form and a lowercased / slugged alias for lookup.
111
+ self.metric2id = metric2id_raw
112
+ self.task_alias = _build_alias_map(self.task2id)
113
+ self.metric_alias = _build_alias_map(self.metric2id)
114
+
115
+ pool = np.load(pool_path, allow_pickle=True)
116
+ self.model_names: list[str] = list(pool["names"].tolist())
117
+ self.size_ids = torch.tensor(pool["size_ids"], dtype=torch.long)
118
+ # Backwards compatible: older pools won't have sizes_b. Default to NaN.
119
+ if "sizes_b" in pool.files:
120
+ self.sizes_b: np.ndarray = pool["sizes_b"].astype(np.float32)
121
+ else:
122
+ self.sizes_b = np.full(len(self.model_names), np.nan, dtype=np.float32)
123
+ self.family_ids = torch.tensor(pool["family_ids"], dtype=torch.long)
124
+ self.popularities: np.ndarray = pool["popularities"]
125
+ self.urls: list[str] = list(pool["urls"].tolist())
126
+
127
+ # Precompute the "official pretrained" mask once — names are static.
128
+ self.is_official: np.ndarray = np.array(
129
+ [_is_official_name(n) for n in self.model_names], dtype=bool
130
+ )
131
+
132
+ # Build the MLPMetric model with the same hyper-parameters used for training.
133
+ cfg = self._train_args
134
+ model_args = SimpleNamespace(
135
+ num_models=cfg.get("num_models", len(self.model_names)),
136
+ num_tasks=cfg.get("num_tasks"),
137
+ num_metrics=cfg.get("num_metrics"),
138
+ num_size_buckets=cfg.get("num_size_buckets"),
139
+ num_families=cfg.get("num_families"),
140
+ token_dim=cfg["token_dim"],
141
+ model_dim=cfg["model_dim"],
142
+ task_dim=cfg["task_dim"],
143
+ metric_dim=cfg.get("metric_dim", cfg["task_dim"]),
144
+ size_dim=cfg["size_dim"],
145
+ family_dim=cfg.get("family_dim", cfg["size_dim"]),
146
+ dataset_desp_dim=cfg["dataset_desp_dim"],
147
+ hidden_dim=cfg["hidden_dim"],
148
+ dropout_rate=cfg.get("dropout_rate", 0.0),
149
+ use_id_emb=bool(cfg.get("use_id_emb", False)),
150
+ use_size_prior=bool(cfg.get("use_size_prior", True)),
151
+ use_family_prior=bool(cfg.get("use_family_prior", False)),
152
+ use_metric_feature=bool(cfg.get("use_metric_feature", True)),
153
+ unknown_metric_id=int(cfg.get("unknown_metric_id", 0)),
154
+ )
155
+ self.model = MLPMetric(model_args).to(self.device).eval()
156
+
157
+ raw = torch.load(checkpoint_path, map_location="cpu")
158
+ state = raw.get("model", raw) if isinstance(raw, dict) else raw
159
+ missing, unexpected = self.model.load_state_dict(state, strict=False)
160
+ if missing or unexpected:
161
+ print(f"[Recommender] loaded with missing={len(missing)} unexpected={len(unexpected)}")
162
+ if missing:
163
+ print(" e.g. missing:", missing[:3])
164
+ if unexpected:
165
+ print(" e.g. unexpected:", unexpected[:3])
166
+
167
+ # Pre-compute the model-side cache once. Running the token encoder over
168
+ # 47k names is the slowest single step; we amortize it to startup.
169
+ self._cache_lock = threading.Lock()
170
+ with torch.no_grad():
171
+ self.model_cache = self.model.build_model_cache(
172
+ self.model_names,
173
+ self.size_ids,
174
+ all_model_family_ids=self.family_ids if self.model.use_family_prior else None,
175
+ device=self.device,
176
+ )
177
+
178
+ # OpenAI client is created lazily so the import is only required when used.
179
+ self._oai_client = None
180
+
181
+ # ------------------------------------------------------------------ embedding
182
+
183
+ def _make_openai_client(self, api_key: Optional[str] = None):
184
+ from openai import OpenAI # noqa: WPS433
185
+ # When the caller supplies a key (e.g. from the Gradio UI), build a
186
+ # fresh client and do NOT cache it — different users send different
187
+ # keys, and we don't want one user's key to be reused for the next.
188
+ if api_key:
189
+ return OpenAI(api_key=api_key)
190
+ # Fallback for local dev: rely on OPENAI_API_KEY in the environment.
191
+ if self._oai_client is None:
192
+ self._oai_client = OpenAI()
193
+ return self._oai_client
194
+
195
+ def embed_description(self, text: str, api_key: Optional[str] = None) -> np.ndarray:
196
+ text = (text or "").strip()
197
+ if not text:
198
+ raise ValueError("Dataset description must be non-empty.")
199
+ try:
200
+ client = self._make_openai_client(api_key)
201
+ except Exception as e: # missing OPENAI_API_KEY in dev, etc.
202
+ raise ValueError(
203
+ "OpenAI client could not be created. Paste an API key into "
204
+ "the 'OpenAI API key' field above. Original error: " + str(e)
205
+ )
206
+ try:
207
+ resp = client.embeddings.create(model=EMBEDDING_MODEL, input=text)
208
+ except Exception as e:
209
+ # Surface auth / quota errors back to the user verbatim — they're
210
+ # the ones who need to fix it.
211
+ raise ValueError(f"OpenAI embedding call failed: {e}")
212
+ vec = np.asarray(resp.data[0].embedding, dtype=np.float32)
213
+ if vec.shape[-1] != EMBEDDING_DIM:
214
+ raise RuntimeError(
215
+ f"Expected {EMBEDDING_DIM}-dim embedding, got {vec.shape[-1]}. "
216
+ f"Make sure the API key has access to {EMBEDDING_MODEL}."
217
+ )
218
+ return vec
219
+
220
+ # ------------------------------------------------------------------ lookups
221
+
222
+ def resolve_task(self, task: str) -> int:
223
+ if task is None:
224
+ raise ValueError("Task must be provided.")
225
+ for cand in (task, task.strip().lower(), _slug(task)):
226
+ if cand in self.task_alias:
227
+ return self.task_alias[cand]
228
+ raise ValueError(
229
+ f"Unknown task '{task}'. Pick one from the dropdown — the model has only seen {len(self.task2id)} task labels."
230
+ )
231
+
232
+ def resolve_metric(self, metric: str) -> int:
233
+ if metric is None or not str(metric).strip():
234
+ return int(self.model.unknown_metric_id)
235
+ for cand in (metric, metric.strip().lower(), _slug(metric)):
236
+ if cand in self.metric_alias:
237
+ return self.metric_alias[cand]
238
+ # Fallback: unknown metric token.
239
+ return int(self.model.unknown_metric_id)
240
+
241
+ # ------------------------------------------------------------------ main API
242
+
243
+ def recommend(
244
+ self,
245
+ dataset_description: str,
246
+ task: str,
247
+ metric: Optional[str] = None,
248
+ top_k: int = 20,
249
+ popularity_weight: float = 0.0,
250
+ hf_only: bool = True,
251
+ min_size_b: Optional[float] = None,
252
+ max_size_b: Optional[float] = None,
253
+ official_only: bool = False,
254
+ api_key: Optional[str] = None,
255
+ ) -> List[Recommendation]:
256
+ """Score all candidate models and return the top-k.
257
+
258
+ ``popularity_weight`` (0..1) blends a log(downloads) signal into the
259
+ ranking, useful when several models have near-tied scores. Default 0
260
+ means "pure model output".
261
+
262
+ ``hf_only`` (default True) drops candidates whose model name is not a
263
+ HuggingFace repo id (those are paper baselines like ``inceptionv4``
264
+ that the user cannot download with ``hf hub``).
265
+
266
+ ``min_size_b`` / ``max_size_b`` (optional, in B params) restrict
267
+ results to candidates whose raw parameter count falls in the range.
268
+ ``None`` (or 0 from the UI) means "no limit". Models with unknown
269
+ size are excluded once any size bound is set.
270
+
271
+ ``official_only`` (default False) restricts to a curated whitelist of
272
+ foundation-lab orgs (DeepSeek, Qwen, Llama, gpt-oss, Mistral, ...).
273
+
274
+ ``api_key`` (optional) — OpenAI API key supplied by the caller (e.g.
275
+ from a Gradio textbox). When given, used for this single request only;
276
+ otherwise the recommender falls back to ``OPENAI_API_KEY`` in env.
277
+ """
278
+ task_id = self.resolve_task(task)
279
+ metric_id = self.resolve_metric(metric)
280
+ emb = self.embed_description(dataset_description, api_key=api_key)
281
+ return self._score(
282
+ emb, task_id, metric_id, top_k, popularity_weight, hf_only,
283
+ min_size_b=min_size_b, max_size_b=max_size_b,
284
+ official_only=official_only,
285
+ )
286
+
287
+ @torch.no_grad()
288
+ def _score(
289
+ self,
290
+ desp_emb: np.ndarray,
291
+ task_id: int,
292
+ metric_id: int,
293
+ top_k: int,
294
+ popularity_weight: float,
295
+ hf_only: bool = True,
296
+ min_size_b: Optional[float] = None,
297
+ max_size_b: Optional[float] = None,
298
+ official_only: bool = False,
299
+ ) -> List[Recommendation]:
300
+ device = self.device
301
+ task_t = torch.tensor([task_id], dtype=torch.long, device=device)
302
+ metric_t = torch.tensor([metric_id], dtype=torch.long, device=device)
303
+ desp_t = torch.tensor(desp_emb, dtype=torch.float32, device=device).unsqueeze(0)
304
+
305
+ with self._cache_lock:
306
+ scores = self.model.score_matrix(
307
+ task_t, desp_t, self.model_cache, metric_ids=metric_t
308
+ ).squeeze(0)
309
+ scores_np = scores.detach().cpu().numpy().astype(np.float32)
310
+
311
+ if popularity_weight > 0.0:
312
+ pop = np.log1p(self.popularities.astype(np.float32))
313
+ if pop.max() > 0:
314
+ pop = pop / pop.max()
315
+ # Re-center scores then add the popularity nudge.
316
+ s_norm = scores_np - scores_np.mean()
317
+ if s_norm.std() > 1e-6:
318
+ s_norm = s_norm / s_norm.std()
319
+ ranking_scores = s_norm + popularity_weight * pop
320
+ else:
321
+ ranking_scores = scores_np
322
+
323
+ # Mask out non-HF candidates by setting their score to -inf.
324
+ if hf_only:
325
+ has_url = np.array([bool(u) for u in self.urls])
326
+ ranking_scores = np.where(has_url, ranking_scores, -np.inf)
327
+
328
+ # Mask candidates outside the manual size bounds (B params).
329
+ # Convention from the UI: 0 / None means "no limit". Models with
330
+ # unknown size are dropped once any bound is set.
331
+ size_filter_active = (min_size_b not in (None, 0)) or (max_size_b not in (None, 0))
332
+ if size_filter_active:
333
+ sizes = self.sizes_b
334
+ in_range = ~np.isnan(sizes)
335
+ if min_size_b not in (None, 0):
336
+ in_range &= sizes >= float(min_size_b)
337
+ if max_size_b not in (None, 0):
338
+ in_range &= sizes <= float(max_size_b)
339
+ ranking_scores = np.where(in_range, ranking_scores, -np.inf)
340
+
341
+ # Mask non-official models when the user wants only flagship checkpoints.
342
+ if official_only:
343
+ ranking_scores = np.where(self.is_official, ranking_scores, -np.inf)
344
+
345
+ top_k = max(1, min(int(top_k), len(self.model_names)))
346
+ top_idx = np.argpartition(-ranking_scores, top_k - 1)[:top_k]
347
+ top_idx = top_idx[np.argsort(-ranking_scores[top_idx])]
348
+
349
+ out: list[Recommendation] = []
350
+ for rank, i in enumerate(top_idx, start=1):
351
+ out.append(
352
+ Recommendation(
353
+ rank=rank,
354
+ model_name=self.model_names[i],
355
+ score=float(scores_np[i]),
356
+ size_bucket=int(self.size_ids[i]),
357
+ size_b=float(self.sizes_b[i]),
358
+ family_id=int(self.family_ids[i]),
359
+ popularity=int(self.popularities[i]),
360
+ hf_url=self.urls[i],
361
+ )
362
+ )
363
+ return out
364
+
365
+
366
+ def default_recommender() -> Recommender:
367
+ """Convenience constructor.
368
+
369
+ Resolves paths in this order:
370
+ 1. Environment variables (``MODEL_CKPT``, ``MODEL_ARGS``, ``DATA_DIR``, ``POOL_PATH``).
371
+ 2. Self-contained Spaces layout: ``web/checkpoint/`` and ``web/data/``.
372
+ 3. Original project tree (development mode).
373
+ """
374
+ here = os.path.dirname(os.path.abspath(__file__))
375
+ root = os.path.dirname(here)
376
+
377
+ spaces_ckpt = os.path.join(here, "checkpoint/MLPMetric.pt")
378
+ spaces_args = os.path.join(here, "checkpoint/args.json")
379
+ spaces_data = os.path.join(here, "data")
380
+
381
+ dev_ckpt = os.path.join(root, "checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id/MLPMetric.pt")
382
+ dev_args = os.path.join(root, "checkpoint/mlp/unified_augmented/ablation_no_model_id_no_dataset_id/args.json")
383
+ dev_data = os.path.join(root, "data/unified_augmented")
384
+
385
+ def _pick(env_key: str, primary: str, fallback: str) -> str:
386
+ v = os.environ.get(env_key)
387
+ if v:
388
+ return v
389
+ return primary if os.path.exists(primary) else fallback
390
+
391
+ return Recommender(
392
+ checkpoint_path=_pick("MODEL_CKPT", spaces_ckpt, dev_ckpt),
393
+ args_path=_pick("MODEL_ARGS", spaces_args, dev_args),
394
+ data_dir=_pick("DATA_DIR", spaces_data, dev_data),
395
+ pool_path=os.environ.get("POOL_PATH", os.path.join(here, "assets/model_pool.npz")),
396
+ device=os.environ.get("DEVICE", "cpu"),
397
+ )
398
+
399
+
400
+ if __name__ == "__main__":
401
+ rec = default_recommender()
402
+ print(f"Loaded {len(rec.model_names)} candidate models, "
403
+ f"{len(rec.task2id)} tasks, {len(rec.metric2id)} metrics.")
404
+ sample_task = next(iter(rec.task2id))
405
+ print(f"\nSmoke test: ranking for task={sample_task!r}")
406
+ fake_emb = np.random.randn(EMBEDDING_DIM).astype(np.float32)
407
+ out = rec._score(fake_emb, rec.task2id[sample_task], rec.model.unknown_metric_id, 5, 0.0)
408
+ for r in out:
409
+ print(f" #{r.rank} {r.model_name:<60} score={r.score:+.4f} pop={r.popularity}")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.1.0,<2.6
2
+ numpy>=1.24,<2.0
3
+ pandas>=2.0,<2.4
4
+ gradio==4.44.0
5
+ gradio-client==1.3.0
6
+ huggingface_hub>=0.24,<0.26
7
+ openai>=1.40