j-chim commited on
Commit
4feff22
Β·
verified Β·
1 Parent(s): 2977356

Upload folder using huggingface_hub

Browse files
packages/eval-entity-resolver/src/eval_entity_resolver/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
  from eval_entity_resolver.alias_store import AliasStore
2
  from eval_entity_resolver.canonical_store import CanonicalStore
 
3
  from eval_entity_resolver.eee import clean_eval_name, extract_metric
4
  from eval_entity_resolver.models import ResolutionResult, ResolverConfig
5
  from eval_entity_resolver.resolver import Resolver
@@ -12,4 +13,5 @@ __all__ = [
12
  "ResolutionResult",
13
  "clean_eval_name",
14
  "extract_metric",
 
15
  ]
 
1
  from eval_entity_resolver.alias_store import AliasStore
2
  from eval_entity_resolver.canonical_store import CanonicalStore
3
+ from eval_entity_resolver.display import humanize_model_slug
4
  from eval_entity_resolver.eee import clean_eval_name, extract_metric
5
  from eval_entity_resolver.models import ResolutionResult, ResolverConfig
6
  from eval_entity_resolver.resolver import Resolver
 
13
  "ResolutionResult",
14
  "clean_eval_name",
15
  "extract_metric",
16
+ "humanize_model_slug",
17
  ]
packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-311.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-311.pyc differ
 
packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-314.pyc CHANGED
Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-314.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/__init__.cpython-314.pyc differ
 
packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/display.cpython-311.pyc ADDED
Binary file (7.88 kB). View file
 
packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/display.cpython-314.pyc ADDED
Binary file (7.83 kB). View file
 
packages/eval-entity-resolver/src/eval_entity_resolver/display.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Display-name humanization for canonical model slugs.
2
+
3
+ Single source of truth for converting machine slugs (`gpt-4o-2024-05-13`) into
4
+ human-friendly display names (`GPT-4o (2024-05-13)`). Used by refresh scripts
5
+ and the seed migration; consumers (frontend, API) should NOT re-humanize but
6
+ read `canonical_models.display_name` directly.
7
+
8
+ Rules in priority order:
9
+ 1. Strip org prefix (`openai/gpt-5` -> `gpt-5`).
10
+ 2. Strip and parenthesize a trailing date suffix:
11
+ - `-YYYY-MM-DD` -> ` (YYYY-MM-DD)`
12
+ - `-YYYYMMDD` -> ` (YYYY-MM-DD)`
13
+ - `-MMDD` (4-digit) -> ` (MMDD)`
14
+ 3. Per-token formatting:
15
+ - Known acronyms render uppercase (`gpt` -> `GPT`).
16
+ - Mixed-case overrides apply (`moe` -> `MoE`).
17
+ - Param sizes uppercase the unit (`7b` -> `7B`, `a22b` -> `A22B`,
18
+ `8x7b` -> `8x7B`, `30m` -> `30M`).
19
+ - Number+letter version tags preserve case (`4o` -> `4o`).
20
+ - O-series stays lowercase (`o1`, `o3`).
21
+ - Vendor-name overrides (`deepseek` -> `DeepSeek`).
22
+ - Default: capitalize first letter.
23
+ 4. Glue an acronym token to the next token with a hyphen when the next
24
+ token is a bare version number (digits + optional `.NN` + optional
25
+ single non-size letter): `GPT 5 Mini` -> `GPT-5 Mini`,
26
+ `GPT 4o ...` -> `GPT-4o ...`. Skipped when the next token is a param
27
+ size like `7B`.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import re
33
+
34
+ ACRONYMS: frozenset[str] = frozenset(
35
+ {
36
+ "gpt",
37
+ "glm",
38
+ "llm",
39
+ "vl",
40
+ "vlm",
41
+ "qvq",
42
+ "qwq",
43
+ "mt",
44
+ "vit",
45
+ "clip",
46
+ "dit",
47
+ "hf",
48
+ "ocr",
49
+ "tts",
50
+ "asr",
51
+ "moe",
52
+ "mlp",
53
+ "rlhf",
54
+ }
55
+ )
56
+
57
+ # Tokens whose canonical rendering is mixed case rather than ALL CAPS.
58
+ CASE_OVERRIDES: dict[str, str] = {
59
+ "moe": "MoE",
60
+ "vit": "ViT",
61
+ "dit": "DiT",
62
+ }
63
+
64
+ # Vendor / family tokens whose canonical rendering doesn't match a simple
65
+ # capitalize() β€” e.g., `deepseek` should display as `DeepSeek`. Keep the
66
+ # list short; this is for tokens the auto-rule mangles, not a general
67
+ # branding registry.
68
+ TOKEN_OVERRIDES: dict[str, str] = {
69
+ "deepseek": "DeepSeek",
70
+ "openai": "OpenAI",
71
+ "stepfun": "StepFun",
72
+ "moonshotai": "MoonshotAI",
73
+ "mistralai": "MistralAI",
74
+ }
75
+
76
+ # Suffixes treated as parameter-count units, NOT version letters. When a
77
+ # token like `7b` appears after an acronym, we do NOT hyphen-glue it.
78
+ _SIZE_SUFFIXES: frozenset[str] = frozenset({"b", "m", "k"})
79
+
80
+
81
+ def humanize_model_slug(slug: str) -> str:
82
+ """Render a model slug as a human display name.
83
+
84
+ Accepts a bare slug (`gpt-4o-2024-05-13`) or a full canonical id
85
+ (`openai/gpt-4o-2024-05-13`); the org prefix is dropped.
86
+ """
87
+ if not slug:
88
+ return ""
89
+ if "/" in slug:
90
+ slug = slug.split("/", 1)[1]
91
+
92
+ slug, suffix = _strip_date_suffix(slug)
93
+
94
+ tokens = slug.split("-")
95
+ formatted = [_format_token(t) for t in tokens]
96
+
97
+ out: list[str] = []
98
+ i = 0
99
+ while i < len(formatted):
100
+ cur_lower = tokens[i].lower()
101
+ if (
102
+ i + 1 < len(formatted)
103
+ and cur_lower in ACRONYMS
104
+ and _is_version_token(tokens[i + 1])
105
+ ):
106
+ out.append(f"{formatted[i]}-{formatted[i + 1]}")
107
+ i += 2
108
+ else:
109
+ out.append(formatted[i])
110
+ i += 1
111
+
112
+ return " ".join(out) + suffix
113
+
114
+
115
+ def _strip_date_suffix(slug: str) -> tuple[str, str]:
116
+ """Pop a trailing date or 4-digit code; return (slug_without, ' (suffix)').
117
+
118
+ Order matters: more specific patterns first, since a partial match
119
+ against a less-specific pattern would mis-render (e.g. `2025` as a
120
+ bare 4-digit code when it's actually the year half of `2025-08`).
121
+ """
122
+ # Full ISO date: `-YYYY-MM-DD`
123
+ m = re.search(r"-(20\d{2}-\d{2}-\d{2})$", slug)
124
+ if m:
125
+ return slug[: m.start()], f" ({m.group(1)})"
126
+ # Compact date: `-YYYYMMDD`
127
+ m = re.search(r"-(20\d{6})$", slug)
128
+ if m:
129
+ d = m.group(1)
130
+ return slug[: m.start()], f" ({d[:4]}-{d[4:6]}-{d[6:8]})"
131
+ # Year-month: `-YYYY-MM` (e.g. `gpt-5-2025-08`)
132
+ m = re.search(r"-(20\d{2})-(\d{2})$", slug)
133
+ if m:
134
+ return slug[: m.start()], f" ({m.group(1)}-{m.group(2)})"
135
+ # Cohere convention: `-MM-YYYY` (e.g. `command-r-08-2024`).
136
+ # Render as `(YYYY-MM)` for ISO-ordered display.
137
+ m = re.search(r"-(\d{2})-(20\d{2})$", slug)
138
+ if m:
139
+ return slug[: m.start()], f" ({m.group(2)}-{m.group(1)})"
140
+ # Bare 4-digit code: `-NNNN` (e.g. `grok-4-0709`, `kimi-k2-0711`).
141
+ m = re.search(r"-(\d{4})$", slug)
142
+ if m:
143
+ return slug[: m.start()], f" ({m.group(1)})"
144
+ return slug, ""
145
+
146
+
147
+ def _format_token(tok: str) -> str:
148
+ if not tok:
149
+ return tok
150
+ low = tok.lower()
151
+ if low in CASE_OVERRIDES:
152
+ return CASE_OVERRIDES[low]
153
+ if low in ACRONYMS:
154
+ return low.upper()
155
+ if low in TOKEN_OVERRIDES:
156
+ return TOKEN_OVERRIDES[low]
157
+ # Param size: 7b, 70b, 1.5b, 30m
158
+ if re.fullmatch(r"\d+(?:\.\d+)?[bmk]", low):
159
+ return low[:-1] + low[-1].upper()
160
+ # MoE active-expert form: a22b, a3b
161
+ if re.fullmatch(r"a\d+(?:\.\d+)?b", low):
162
+ return "A" + low[1:-1] + "B"
163
+ # MxNb: 8x7b -> 8x7B
164
+ if re.fullmatch(r"\d+x\d+(?:\.\d+)?b", low):
165
+ return low[:-1] + "B"
166
+ # Number followed by a single lowercase letter that's NOT a size suffix:
167
+ # version tags like `4o`, `5o` β€” keep as-is.
168
+ if re.fullmatch(r"\d+(?:\.\d+)?[a-z]", low) and low[-1] not in _SIZE_SUFFIXES:
169
+ return low
170
+ # O-series: o1, o3, o4
171
+ if re.fullmatch(r"o\d+", low):
172
+ return low
173
+ # Default: capitalize first letter, preserve rest.
174
+ return tok[0].upper() + tok[1:] if tok[0].isalpha() else tok
175
+
176
+
177
+ def _is_version_token(tok: str) -> bool:
178
+ """True if `tok` looks like a version (e.g. `5`, `4.5`, `4o`) and not
179
+ a parameter size (`7b`, `70m`)."""
180
+ m = re.fullmatch(r"(\d+(?:\.\d+)?)([a-z]?)", tok.lower())
181
+ if not m:
182
+ return False
183
+ return m.group(2) not in _SIZE_SUFFIXES
src/eval_card_registry/__pycache__/cli.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/__pycache__/cli.cpython-314.pyc and b/src/eval_card_registry/__pycache__/cli.cpython-314.pyc differ
 
src/eval_card_registry/services/__pycache__/hub_stats.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/services/__pycache__/hub_stats.cpython-314.pyc and b/src/eval_card_registry/services/__pycache__/hub_stats.cpython-314.pyc differ
 
src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc and b/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc differ
 
src/eval_card_registry/services/hub_stats.py CHANGED
@@ -199,9 +199,22 @@ class HubStatsClient:
199
  return self._con
200
  # Import lazily so processes that never call lookup() don't pay
201
  # the duckdb import cost.
 
202
  import duckdb
203
  con = duckdb.connect()
204
  con.execute("INSTALL httpfs; LOAD httpfs;")
 
 
 
 
 
 
 
 
 
 
 
 
205
  self._con = con
206
  return con
207
 
 
199
  return self._con
200
  # Import lazily so processes that never call lookup() don't pay
201
  # the duckdb import cost.
202
+ import os
203
  import duckdb
204
  con = duckdb.connect()
205
  con.execute("INSTALL httpfs; LOAD httpfs;")
206
+ # Authenticate parquet fetches when HF_TOKEN is in the environment
207
+ # (typical on the deployed Space). Unauth limit is 500 req/5min;
208
+ # one DuckDB read_parquet against the remote file streams via
209
+ # several range requests and a sync that auto-creates many drafts
210
+ # can brush that ceiling. With auth the ceiling is ~30k/5min.
211
+ hf_token = os.environ.get("HF_TOKEN")
212
+ if hf_token:
213
+ escaped = hf_token.replace("'", "''")
214
+ con.execute(
215
+ f"CREATE SECRET hf_auth (TYPE HTTP, BEARER_TOKEN '{escaped}', "
216
+ f"SCOPE 'https://huggingface.co');"
217
+ )
218
  self._con = con
219
  return con
220
 
src/eval_card_registry/services/resolution_service.py CHANGED
@@ -15,6 +15,7 @@ from datetime import datetime, timezone
15
  from typing import Optional
16
 
17
  from eval_entity_resolver import AliasStore, CanonicalStore, Resolver, ResolverConfig, ResolutionResult
 
18
 
19
  from eval_card_registry.config import settings
20
  from eval_card_registry.store.hf_store import RegistryStore
@@ -294,9 +295,17 @@ class ResolutionService:
294
  candidate_id = f"{candidate_id}-{str(uuid.uuid4())[:8]}"
295
 
296
  now = _now()
 
 
 
 
 
 
 
 
297
  base = {
298
  "id": candidate_id,
299
- "display_name": raw_value,
300
  "metadata": "{}",
301
  "review_status": "draft",
302
  "created_at": now,
 
15
  from typing import Optional
16
 
17
  from eval_entity_resolver import AliasStore, CanonicalStore, Resolver, ResolverConfig, ResolutionResult
18
+ from eval_entity_resolver.display import humanize_model_slug
19
 
20
  from eval_card_registry.config import settings
21
  from eval_card_registry.store.hf_store import RegistryStore
 
295
  candidate_id = f"{candidate_id}-{str(uuid.uuid4())[:8]}"
296
 
297
  now = _now()
298
+ # Models get a humanized display name (`gpt-5-2025-08-07` ->
299
+ # `GPT-5 (2025-08-07)`); other entity types pass `raw_value`
300
+ # through β€” benchmark/metric/harness/org names are usually
301
+ # already in their preferred display form.
302
+ if entity_type == "model":
303
+ display = humanize_model_slug(raw_value) or raw_value
304
+ else:
305
+ display = raw_value
306
  base = {
307
  "id": candidate_id,
308
+ "display_name": display,
309
  "metadata": "{}",
310
  "review_status": "draft",
311
  "created_at": now,
src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc differ
 
src/eval_card_registry/store/queries.py CHANGED
@@ -85,9 +85,12 @@ def derive_model_lineage_fields(store: RegistryStore) -> dict[str, int]:
85
  `root_model_id`, `lineage_origin_org_id`, and inherited `open_weights`
86
  columns.
87
 
88
- - `root_model_id`: walk parents up through *only* `quantized` edges
89
- (identity-preserving chain). NULL when self has no quantized
90
- ancestor β€” i.e., self IS the identity root.
 
 
 
91
  - `lineage_origin_org_id`: walk through any non-`variant` edge
92
  (quantized / finetune / merge / adapter) to the deepest ancestor,
93
  then read its `org_id`. For Meta-originated models = self.org_id;
@@ -117,8 +120,8 @@ def derive_model_lineage_fields(store: RegistryStore) -> dict[str, int]:
117
  ow = row.get("open_weights")
118
  open_by_id[cid] = None if _is_na(ow) else bool(ow)
119
 
120
- def _walk(start: str, allowed: set[str]) -> str:
121
- """Walk parents through edges whose relationship is in `allowed`.
122
  Returns the deepest reachable id; stops on no-match or cycle."""
123
  visited = {start}
124
  current = start
@@ -128,7 +131,7 @@ def derive_model_lineage_fields(store: RegistryStore) -> dict[str, int]:
128
  for p in edges:
129
  if not isinstance(p, dict):
130
  continue
131
- if p.get("relationship") in allowed and p.get("id"):
132
  next_id = p["id"]
133
  break
134
  if not next_id or next_id in visited or next_id not in parents_by_id:
@@ -136,6 +139,17 @@ def derive_model_lineage_fields(store: RegistryStore) -> dict[str, int]:
136
  visited.add(next_id)
137
  current = next_id
138
 
 
 
 
 
 
 
 
 
 
 
 
139
  def _inherit_open_from_ancestors(start: str) -> Optional[bool]:
140
  """Walk ONLY ancestors (skip self) through `variant` + `quantized`
141
  edges and return the first explicit `open_weights` value found.
@@ -166,11 +180,12 @@ def derive_model_lineage_fields(store: RegistryStore) -> dict[str, int]:
166
  open_updates: dict[str, Optional[bool]] = {}
167
  inherited_count = 0
168
  for cid in parents_by_id:
169
- # Identity root via quantized-only walk
170
- root = _walk(cid, {"quantized"})
 
171
  root_updates[cid] = root if root != cid else None
172
  # Lineage origin via any non-variant edge; org of deepest ancestor
173
- ancestor = _walk(cid, {"quantized", "finetune", "merge", "adapter"})
174
  lineage_updates[cid] = org_by_id.get(ancestor) or org_by_id.get(cid)
175
  # Open weights β€” explicit self value WINS; only fall back to
176
  # ancestor inheritance when self has no value set. Never overwrite
 
85
  `root_model_id`, `lineage_origin_org_id`, and inherited `open_weights`
86
  columns.
87
 
88
+ - `root_model_id`: walk parents up through edges that preserve API
89
+ identity β€” `quantized` (different precision, same model) and
90
+ `variant axis=version` (dated snapshot of the same release, e.g.
91
+ `gpt-4o-2024-05-13` -> `gpt-4o`). NULL when self has no such
92
+ ancestor β€” i.e., self IS the identity root. Other variant axes
93
+ (size, mode, modality, domain) keep separate identity at the leaf.
94
  - `lineage_origin_org_id`: walk through any non-`variant` edge
95
  (quantized / finetune / merge / adapter) to the deepest ancestor,
96
  then read its `org_id`. For Meta-originated models = self.org_id;
 
120
  ow = row.get("open_weights")
121
  open_by_id[cid] = None if _is_na(ow) else bool(ow)
122
 
123
+ def _walk(start: str, edge_ok) -> str:
124
+ """Walk parents through edges where `edge_ok(edge)` is True.
125
  Returns the deepest reachable id; stops on no-match or cycle."""
126
  visited = {start}
127
  current = start
 
131
  for p in edges:
132
  if not isinstance(p, dict):
133
  continue
134
+ if edge_ok(p) and p.get("id"):
135
  next_id = p["id"]
136
  break
137
  if not next_id or next_id in visited or next_id not in parents_by_id:
 
139
  visited.add(next_id)
140
  current = next_id
141
 
142
+ def _is_identity_edge(p: dict) -> bool:
143
+ rel = p.get("relationship")
144
+ if rel == "quantized":
145
+ return True
146
+ if rel == "variant" and p.get("axis") == "version":
147
+ return True
148
+ return False
149
+
150
+ def _is_lineage_edge(p: dict) -> bool:
151
+ return p.get("relationship") in {"quantized", "finetune", "merge", "adapter"}
152
+
153
  def _inherit_open_from_ancestors(start: str) -> Optional[bool]:
154
  """Walk ONLY ancestors (skip self) through `variant` + `quantized`
155
  edges and return the first explicit `open_weights` value found.
 
180
  open_updates: dict[str, Optional[bool]] = {}
181
  inherited_count = 0
182
  for cid in parents_by_id:
183
+ # Identity root via quantized + variant-version walk (both treat
184
+ # the parent as the same model at the API level β€” see docstring).
185
+ root = _walk(cid, _is_identity_edge)
186
  root_updates[cid] = root if root != cid else None
187
  # Lineage origin via any non-variant edge; org of deepest ancestor
188
+ ancestor = _walk(cid, _is_lineage_edge)
189
  lineage_updates[cid] = org_by_id.get(ancestor) or org_by_id.get(cid)
190
  # Open weights β€” explicit self value WINS; only fall back to
191
  # ancestor inheritance when self has no value set. Never overwrite