j-chim commited on
Commit
e70d416
·
verified ·
1 Parent(s): 5922ac4

Upload folder using huggingface_hub

Browse files
packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc CHANGED
Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc differ
 
packages/eval-entity-resolver/src/eval_entity_resolver/alias_store.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  import uuid
4
  from datetime import datetime, timezone
5
  from pathlib import Path
@@ -7,6 +8,8 @@ from typing import Optional
7
 
8
  import pandas as pd
9
 
 
 
10
 
11
  _SCHEMA = {
12
  "id": pd.StringDtype(),
@@ -71,15 +74,41 @@ class AliasStore:
71
  @classmethod
72
  def from_parquet(cls, path: str | Path, read_only: bool = False) -> "AliasStore":
73
  p = Path(path) / "aliases.parquet"
74
- if p.exists():
 
 
 
 
 
 
 
 
 
75
  df = pd.read_parquet(p)
76
- else:
 
 
 
 
 
 
 
 
 
 
 
 
77
  df = _empty_df()
78
  return cls(df, read_only=read_only)
79
 
80
  @classmethod
81
  def from_hf(cls, repo_id: str, read_only: bool = False) -> "AliasStore":
82
  from huggingface_hub import hf_hub_download
 
 
 
 
 
83
 
84
  try:
85
  local = hf_hub_download(
@@ -88,7 +117,36 @@ class AliasStore:
88
  repo_type="dataset",
89
  )
90
  df = pd.read_parquet(local)
91
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  df = _empty_df()
93
  return cls(df, read_only=read_only)
94
 
 
1
  from __future__ import annotations
2
 
3
+ import logging
4
  import uuid
5
  from datetime import datetime, timezone
6
  from pathlib import Path
 
8
 
9
  import pandas as pd
10
 
11
+ logger = logging.getLogger(__name__)
12
+
13
 
14
  _SCHEMA = {
15
  "id": pd.StringDtype(),
 
74
  @classmethod
75
  def from_parquet(cls, path: str | Path, read_only: bool = False) -> "AliasStore":
76
  p = Path(path) / "aliases.parquet"
77
+ if not p.exists():
78
+ # Missing dir / missing file is the legitimate "fresh store"
79
+ # case (used by tests and first-time seed runs), so log at INFO
80
+ # instead of WARNING — but still surface it.
81
+ logger.info(
82
+ "AliasStore.from_parquet: %s not found; falling back to empty store",
83
+ p,
84
+ )
85
+ return cls(_empty_df(), read_only=read_only)
86
+ try:
87
  df = pd.read_parquet(p)
88
+ except (OSError, ValueError) as exc:
89
+ # OSError covers permission / IO errors; ValueError is what
90
+ # pyarrow raises for corrupt parquet (ArrowInvalid is a
91
+ # ValueError subclass). Keep the empty-store fallback so a
92
+ # corrupt local cache doesn't hard-crash callers, but log so
93
+ # the failure isn't silent.
94
+ logger.warning(
95
+ "AliasStore.from_parquet: failed to read %s (%s: %s); "
96
+ "falling back to empty store",
97
+ p,
98
+ type(exc).__name__,
99
+ exc,
100
+ )
101
  df = _empty_df()
102
  return cls(df, read_only=read_only)
103
 
104
  @classmethod
105
  def from_hf(cls, repo_id: str, read_only: bool = False) -> "AliasStore":
106
  from huggingface_hub import hf_hub_download
107
+ from huggingface_hub.errors import (
108
+ EntryNotFoundError,
109
+ HfHubHTTPError,
110
+ RepositoryNotFoundError,
111
+ )
112
 
113
  try:
114
  local = hf_hub_download(
 
117
  repo_type="dataset",
118
  )
119
  df = pd.read_parquet(local)
120
+ except (
121
+ RepositoryNotFoundError,
122
+ EntryNotFoundError,
123
+ HfHubHTTPError,
124
+ FileNotFoundError,
125
+ OSError,
126
+ ValueError,
127
+ ) as exc:
128
+ # Specific catches:
129
+ # - RepositoryNotFoundError: repo missing or auth failure
130
+ # (HF returns 401 disguised as 404 when token is invalid).
131
+ # - EntryNotFoundError: repo exists but aliases/part-0.parquet
132
+ # hasn't been seeded yet.
133
+ # - HfHubHTTPError: catch-all for other HTTP failures
134
+ # (network errors, 5xx, rate limits).
135
+ # - FileNotFoundError / OSError: filesystem-level errors
136
+ # reading the downloaded file.
137
+ # - ValueError: pyarrow.lib.ArrowInvalid (parquet corruption)
138
+ # subclasses ValueError.
139
+ # We keep the fallback-to-empty recovery (callers expect the
140
+ # store to construct), but emit a warning so the failure is
141
+ # visible — silent fallback was masking auth and corruption
142
+ # issues during deploys.
143
+ logger.warning(
144
+ "AliasStore.from_hf: failed to load aliases from %r (%s: %s); "
145
+ "falling back to empty store",
146
+ repo_id,
147
+ type(exc).__name__,
148
+ exc,
149
+ )
150
  df = _empty_df()
151
  return cls(df, read_only=read_only)
152
 
packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc CHANGED
Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc differ
 
packages/eval-entity-resolver/src/eval_entity_resolver/strategies/fuzzy.py CHANGED
@@ -62,6 +62,20 @@ _STRIP_SUFFIX_PATTERNS: list[re.Pattern[str]] = [
62
  # Known org aliases: {variant_prefix: canonical_prefix}
63
  # Convention: simplify HF org names (e.g. "deepseek-ai" → "deepseek") to the
64
  # shorter form used as canonical in this registry.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  _ORG_ALIASES: dict[str, str] = {
66
  "deepseek-ai": "deepseek",
67
  "cohereforai": "cohere",
@@ -70,8 +84,152 @@ _ORG_ALIASES: dict[str, str] = {
70
  "meta-llama": "meta",
71
  "mistral-ai": "mistralai",
72
  "nvidia-nemo": "nvidia",
 
 
 
 
 
 
 
 
73
  }
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # Confidence assigned to stem-match results. Below 1.0 (exact) and 0.95
76
  # (normalized) so the provenance is clear in the resolution log.
77
  _STEM_CONFIDENCE = 0.90
@@ -113,6 +271,13 @@ def fuzzy_match(
113
 
114
  Returns ``(canonical_id, confidence)``; canonical_id is None on no match.
115
  """
 
 
 
 
 
 
 
116
  candidates_to_try: list[str] = []
117
 
118
  # 1. Suffix stripping (may produce multiple stems: strip one, strip two, etc.)
@@ -124,13 +289,38 @@ def fuzzy_match(
124
  if double:
125
  candidates_to_try.append(double)
126
 
127
- # 2. Org normalizationon both original and stripped forms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  for val in [raw_value] + candidates_to_try[:]:
129
  rewritten = _normalize_org(val)
130
  if rewritten:
131
  candidates_to_try.append(rewritten)
132
 
133
- # 3. Check each candidate against exact then normalized lookups.
134
  # Scoped-aware: config-scoped aliases for ``source_config`` count as
135
  # candidates; unrelated scoped aliases are excluded.
136
  norm_lookup = alias_store.get_normalized_lookup(entity_type, source_config)
 
62
  # Known org aliases: {variant_prefix: canonical_prefix}
63
  # Convention: simplify HF org names (e.g. "deepseek-ai" → "deepseek") to the
64
  # shorter form used as canonical in this registry.
65
+ #
66
+ # Zhipu/Z.ai cluster: the GLM-family canonical org is `zai` (short form used
67
+ # in this registry for canonical_ids like `zai/glm-4.5`). HF and various
68
+ # leaderboards spell it as `zhipu`, `zhipu-ai`, `z-ai`, or `zai-org` — all
69
+ # refer to the same Beijing AI startup behind GLM.
70
+ #
71
+ # Moonshot AI cluster: canonical org is `moonshotai` (matches HF
72
+ # `moonshotai/Kimi-*` namespace); aliases cover `moonshot` and `moonshot-ai`
73
+ # spellings seen in the corpus.
74
+ #
75
+ # `alibaba` → `qwen` was considered but skipped: the corpus has 1
76
+ # non-Qwen entry (`alibaba__mineru2-pipeline`) which would be wrongly
77
+ # rewritten. Qwen models under `alibaba/` are handled via explicit
78
+ # overrides instead.
79
  _ORG_ALIASES: dict[str, str] = {
80
  "deepseek-ai": "deepseek",
81
  "cohereforai": "cohere",
 
84
  "meta-llama": "meta",
85
  "mistral-ai": "mistralai",
86
  "nvidia-nemo": "nvidia",
87
+ # Zhipu/Z.ai → zai
88
+ "zhipu": "zai",
89
+ "zhipu-ai": "zai",
90
+ "z-ai": "zai",
91
+ "zai-org": "zai",
92
+ # Moonshot → moonshotai
93
+ "moonshot": "moonshotai",
94
+ "moonshot-ai": "moonshotai",
95
  }
96
 
97
+ # Host / gateway / placeholder prefixes that should be DROPPED entirely
98
+ # (not rewritten to a canonical org). These are not model authors —
99
+ # they're hosting platforms, gateways, or placeholders for missing
100
+ # developer fields. When raw_value uses one of these as the org prefix,
101
+ # the resolver tries the bare suffix in addition to the full string.
102
+ #
103
+ # Identified from corpus surveys: alphaxiv leaderboard uses `unknown/`
104
+ # when developer field is absent; Bedrock/Vertex/Azure/Fireworks/etc.
105
+ # are inference platforms re-hosting other companies' models.
106
+ _HOST_PREFIXES_TO_STRIP: set[str] = {
107
+ "unknown",
108
+ "bedrock", "amazon-bedrock", "aws-bedrock",
109
+ "azure", "azure-openai", "azure-cognitive-services",
110
+ "vertex", "google-vertex", "vertex-anthropic",
111
+ "fireworks", "fireworks-ai",
112
+ "groq",
113
+ "together", "togetherai", "together-ai",
114
+ "openrouter",
115
+ "perplexity-agent",
116
+ "deepinfra", "anyscale", "novita", "novita-ai", "replicate",
117
+ "ollama", "ollama-cloud",
118
+ "github-models", "github-copilot",
119
+ "lambda", "baseten", "modal", "runpod", "cerebras",
120
+ "sap-ai-core", "cloudflare-ai-gateway", "aihubmix",
121
+ "kilo", "vercel", "llmgateway", "poe",
122
+ }
123
+
124
+
125
+ def _drop_duplicated_org_prefix(value: str) -> str | None:
126
+ """Detect and collapse a repeated-org-prefix typo.
127
+
128
+ Recognized shapes (token equality is case-insensitive, but the
129
+ returned string preserves the original casing of `value` so the
130
+ downstream lookups can still match exact aliases):
131
+
132
+ - ``<org>/<org>-<rest>`` → ``<org>/<rest>``
133
+ - ``<org>/<org>_<rest>`` → ``<org>/<rest>``
134
+ - ``<org>/<org>/<rest>`` → ``<org>/<rest>`` (literal double slash)
135
+ - ``<org>__<org>-<rest>`` → ``<org>__<rest>`` (slug form;
136
+ the pipeline rewrites ``/`` → ``__`` for route_ids and the resolver
137
+ may receive either)
138
+ - ``<org>__<org>__<rest>`` → ``<org>__<rest>`` (slug form
139
+ of the literal double-slash variant)
140
+
141
+ Returns ``None`` when the prefix is not duplicated, or when the
142
+ repeated-prefix slug shape is followed by something that doesn't
143
+ cleanly separate (e.g. ``gpt-4/gpt-4-turbo`` — the second ``gpt-4``
144
+ is the START of the model name, not a duplicated prefix).
145
+
146
+ The match requires exact token equality of the two leading tokens.
147
+ A substring overlap (``gpt-4`` ⊂ ``gpt-4-turbo``) is intentionally
148
+ NOT enough — that's a real two-segment HF path, not a typo.
149
+
150
+ To disambiguate the org-typo case (``openai/openai-o1``) from the
151
+ model-family-prefix case (``gpt-4/gpt-4-turbo``): the heuristic
152
+ only fires when the leading org token has no internal hyphen.
153
+ Real org names (``openai``, ``moonshotai``, ``anthropic``) are
154
+ single tokens; model-family prefixes (``gpt-4``, ``llama-3``,
155
+ ``claude-opus-4-5``) contain hyphens. This is imperfect — a
156
+ hyphenated org like ``mistral-ai`` would slip through — but
157
+ those are already captured upstream by the org-alias pass.
158
+ """
159
+ if not value:
160
+ return None
161
+
162
+ # Slash forms first (canonical HF path style).
163
+ if "/" in value:
164
+ first_slash = value.index("/")
165
+ org = value[:first_slash]
166
+ rest = value[first_slash + 1:]
167
+ if not org or not rest:
168
+ return None
169
+ # Skip when the leading token contains a hyphen — likely a
170
+ # model-family prefix (e.g. `gpt-4/gpt-4-turbo`), not a
171
+ # duplicated-org typo. Hyphenated orgs like `mistral-ai` are
172
+ # canonicalized via the org-alias pass first.
173
+ if "-" in org:
174
+ return None
175
+ org_lower = org.lower()
176
+ # `<org>/<org>/<rest>` literal double slash
177
+ if "/" in rest:
178
+ second, after = rest.split("/", 1)
179
+ if second.lower() == org_lower and after:
180
+ return f"{org}/{after}"
181
+ # `<org>/<org>-<rest>` and `<org>/<org>_<rest>`
182
+ for sep in ("-", "_"):
183
+ prefix = org_lower + sep
184
+ if rest.lower().startswith(prefix) and len(rest) > len(prefix):
185
+ return f"{org}/{rest[len(prefix):]}"
186
+
187
+ # Slug forms (route_id style with `__`).
188
+ if "__" in value:
189
+ first = value.index("__")
190
+ org = value[:first]
191
+ rest = value[first + 2:]
192
+ if not org or not rest:
193
+ return None
194
+ # Same hyphen-in-org guard (see slash branch above).
195
+ if "-" in org:
196
+ return None
197
+ org_lower = org.lower()
198
+ # `<org>__<org>__<rest>`
199
+ if "__" in rest:
200
+ second, after = rest.split("__", 1)
201
+ if second.lower() == org_lower and after:
202
+ return f"{org}__{after}"
203
+ # `<org>__<org>-<rest>` (and `_<rest>` — note we already consumed `__`,
204
+ # so the next separator is a single `-` or `_`).
205
+ for sep in ("-", "_"):
206
+ prefix = org_lower + sep
207
+ if rest.lower().startswith(prefix) and len(rest) > len(prefix):
208
+ return f"{org}__{rest[len(prefix):]}"
209
+
210
+ return None
211
+
212
+
213
+ def _drop_host_prefix(value: str) -> str | None:
214
+ """If value's developer prefix is a known hosting platform, return the
215
+ bare suffix portion (everything after the first separator). Otherwise None.
216
+
217
+ Handles both `host/model` and `host.model` separators."""
218
+ if "/" in value:
219
+ org, rest = value.split("/", 1)
220
+ if org.lower() in _HOST_PREFIXES_TO_STRIP and rest:
221
+ return rest
222
+ if "." in value:
223
+ # Bedrock-style: "anthropic.claude-3-5-sonnet" → "anthropic.claude-3-5-sonnet"
224
+ # is itself a host format, but the prefix BEFORE the dot is the host.
225
+ # Only strip if everything-before-first-dot is a host name.
226
+ first_dot = value.index(".")
227
+ org = value[:first_dot]
228
+ rest = value[first_dot + 1:]
229
+ if org.lower() in _HOST_PREFIXES_TO_STRIP and rest:
230
+ return rest
231
+ return None
232
+
233
  # Confidence assigned to stem-match results. Below 1.0 (exact) and 0.95
234
  # (normalized) so the provenance is clear in the resolution log.
235
  _STEM_CONFIDENCE = 0.90
 
271
 
272
  Returns ``(canonical_id, confidence)``; canonical_id is None on no match.
273
  """
274
+ # The heuristics below are intentionally model-specific: they strip
275
+ # hosting prefixes, org aliases, dated model snapshots, and inference-mode
276
+ # suffixes. Applying them to benchmarks/metrics/harnesses can merge
277
+ # unrelated entities that merely share a host-like prefix or model-ish tail.
278
+ if entity_type != "model":
279
+ return None, 0.0
280
+
281
  candidates_to_try: list[str] = []
282
 
283
  # 1. Suffix stripping (may produce multiple stems: strip one, strip two, etc.)
 
289
  if double:
290
  candidates_to_try.append(double)
291
 
292
+ # 2. Host-prefix droppingif raw_value's developer prefix is a known
293
+ # hosting platform / gateway / placeholder, also try the bare suffix.
294
+ # Apply on the original AND any suffix-stripped forms.
295
+ for val in [raw_value] + candidates_to_try[:]:
296
+ bare = _drop_host_prefix(val)
297
+ if bare:
298
+ candidates_to_try.append(bare)
299
+ # The bare form might itself need suffix stripping
300
+ stripped_bare = _strip_suffix(bare)
301
+ if stripped_bare:
302
+ candidates_to_try.append(stripped_bare)
303
+
304
+ # 3. Duplicated-org-prefix collapse — catches typos like
305
+ # `moonshotai/moonshotai-kimi-k2-instruct` (and the slug-form
306
+ # `moonshotai__moonshotai-kimi-k2-instruct`). Runs AFTER suffix /
307
+ # host strip so the deduped form goes through the rest of the
308
+ # pipeline (org alias + lookup), and BEFORE org alias so the
309
+ # collapsed string can pick up `_ORG_ALIASES` rewriting on the
310
+ # next step.
311
+ for val in [raw_value] + candidates_to_try[:]:
312
+ deduped = _drop_duplicated_org_prefix(val)
313
+ if deduped:
314
+ candidates_to_try.append(deduped)
315
+
316
+ # 4. Org normalization — on original, suffix-stripped, host-stripped,
317
+ # and duplicate-org-collapsed forms.
318
  for val in [raw_value] + candidates_to_try[:]:
319
  rewritten = _normalize_org(val)
320
  if rewritten:
321
  candidates_to_try.append(rewritten)
322
 
323
+ # 5. Check each candidate against exact then normalized lookups.
324
  # Scoped-aware: config-scoped aliases for ``source_config`` count as
325
  # candidates; unrelated scoped aliases are excluded.
326
  norm_lookup = alias_store.get_normalized_lookup(entity_type, source_config)
src/eval_card_registry/__pycache__/cli.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/__pycache__/cli.cpython-314.pyc and b/src/eval_card_registry/__pycache__/cli.cpython-314.pyc differ
 
src/eval_card_registry/__pycache__/main.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/__pycache__/main.cpython-314.pyc and b/src/eval_card_registry/__pycache__/main.cpython-314.pyc differ
 
src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc differ
 
src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc differ
 
src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc differ
 
src/eval_card_registry/api/routes_entities.py CHANGED
@@ -52,6 +52,15 @@ def _decode(entity: dict) -> dict:
52
  return out
53
 
54
 
 
 
 
 
 
 
 
 
 
55
  # ------------------------------------------------------------------
56
  # Models
57
  # ------------------------------------------------------------------
@@ -63,17 +72,29 @@ def list_models(
63
  review_status: Optional[ReviewStatus] = None,
64
  store: RegistryStore = Depends(get_store),
65
  ):
66
- return [_decode(e) for e in queries.list_entities(store, "canonical_models", search=search, review_status=review_status, developer=developer)]
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  @router.get("/models/{model_id:path}")
70
  def get_model(model_id: str, store: RegistryStore = Depends(get_store)):
71
- return _decode(_get_or_404(store, "canonical_models", model_id))
72
 
73
 
74
  @router.post("/models", status_code=201, dependencies=_writable)
75
  def create_model(body: ModelCreate, store: RegistryStore = Depends(get_store)):
76
- return _decode(queries.upsert_entity(store, "canonical_models", _encode(body.model_dump())))
 
 
 
77
 
78
 
79
  @router.patch("/models/{model_id:path}", dependencies=_writable)
@@ -81,7 +102,10 @@ def patch_model(model_id: str, body: ModelPatch, store: RegistryStore = Depends(
81
  _get_or_404(store, "canonical_models", model_id)
82
  data = {k: v for k, v in body.model_dump().items() if v is not None}
83
  data["id"] = model_id
84
- return _decode(queries.upsert_entity(store, "canonical_models", _encode(data)))
 
 
 
85
 
86
 
87
  # ------------------------------------------------------------------
 
52
  return out
53
 
54
 
55
+ def _decode_model(store: RegistryStore, entity: dict) -> dict:
56
+ out = _decode(entity)
57
+ org_id = out.get("org_id")
58
+ if org_id and store.has_table("canonical_orgs"):
59
+ org = queries.get_entity(store, "canonical_orgs", org_id)
60
+ out["developer"] = org.get("display_name") if org else None
61
+ return out
62
+
63
+
64
  # ------------------------------------------------------------------
65
  # Models
66
  # ------------------------------------------------------------------
 
72
  review_status: Optional[ReviewStatus] = None,
73
  store: RegistryStore = Depends(get_store),
74
  ):
75
+ return [
76
+ _decode_model(store, e)
77
+ for e in queries.list_entities(
78
+ store,
79
+ "canonical_models",
80
+ search=search,
81
+ review_status=review_status,
82
+ developer=developer,
83
+ )
84
+ ]
85
 
86
 
87
  @router.get("/models/{model_id:path}")
88
  def get_model(model_id: str, store: RegistryStore = Depends(get_store)):
89
+ return _decode_model(store, _get_or_404(store, "canonical_models", model_id))
90
 
91
 
92
  @router.post("/models", status_code=201, dependencies=_writable)
93
  def create_model(body: ModelCreate, store: RegistryStore = Depends(get_store)):
94
+ return _decode_model(
95
+ store,
96
+ queries.upsert_entity(store, "canonical_models", _encode(body.model_dump())),
97
+ )
98
 
99
 
100
  @router.patch("/models/{model_id:path}", dependencies=_writable)
 
102
  _get_or_404(store, "canonical_models", model_id)
103
  data = {k: v for k, v in body.model_dump().items() if v is not None}
104
  data["id"] = model_id
105
+ return _decode_model(
106
+ store,
107
+ queries.upsert_entity(store, "canonical_models", _encode(data)),
108
+ )
109
 
110
 
111
  # ------------------------------------------------------------------
src/eval_card_registry/api/schemas.py CHANGED
@@ -2,7 +2,7 @@ from typing import Any, Literal, Optional
2
  from pydantic import BaseModel
3
 
4
 
5
- EntityType = Literal["benchmark", "model", "metric", "harness"]
6
  ReviewStatus = Literal["draft", "reviewed"]
7
  AliasStatus = Literal["auto", "uncertain", "confirmed", "rejected"]
8
 
@@ -22,6 +22,7 @@ class ResolveResponse(BaseModel):
22
  confidence: float
23
  created_new: bool
24
  review_status: Optional[str]
 
25
 
26
 
27
  # --- Entities ---
@@ -30,9 +31,11 @@ class ModelCreate(BaseModel):
30
  id: str
31
  display_name: str
32
  developer: Optional[str] = None
 
33
  family: Optional[str] = None
34
  architecture: Optional[str] = None
35
  params_billions: Optional[float] = None
 
36
  tags: list[str] = []
37
  metadata: dict[str, Any] = {}
38
  review_status: str = "draft"
@@ -41,9 +44,11 @@ class ModelCreate(BaseModel):
41
  class ModelPatch(BaseModel):
42
  display_name: Optional[str] = None
43
  developer: Optional[str] = None
 
44
  family: Optional[str] = None
45
  architecture: Optional[str] = None
46
  params_billions: Optional[float] = None
 
47
  tags: Optional[list[str]] = None
48
  metadata: Optional[dict[str, Any]] = None
49
  review_status: Optional[str] = None
@@ -108,6 +113,29 @@ class HarnessPatch(BaseModel):
108
  review_status: Optional[str] = None
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # --- Aliases ---
112
 
113
  class AliasPatch(BaseModel):
 
2
  from pydantic import BaseModel
3
 
4
 
5
+ EntityType = Literal["benchmark", "model", "metric", "harness", "org"]
6
  ReviewStatus = Literal["draft", "reviewed"]
7
  AliasStatus = Literal["auto", "uncertain", "confirmed", "rejected"]
8
 
 
22
  confidence: float
23
  created_new: bool
24
  review_status: Optional[str]
25
+ parent_canonical_id: Optional[str] = None
26
 
27
 
28
  # --- Entities ---
 
31
  id: str
32
  display_name: str
33
  developer: Optional[str] = None
34
+ org_id: Optional[str] = None
35
  family: Optional[str] = None
36
  architecture: Optional[str] = None
37
  params_billions: Optional[float] = None
38
+ parent_model_id: Optional[str] = None
39
  tags: list[str] = []
40
  metadata: dict[str, Any] = {}
41
  review_status: str = "draft"
 
44
  class ModelPatch(BaseModel):
45
  display_name: Optional[str] = None
46
  developer: Optional[str] = None
47
+ org_id: Optional[str] = None
48
  family: Optional[str] = None
49
  architecture: Optional[str] = None
50
  params_billions: Optional[float] = None
51
+ parent_model_id: Optional[str] = None
52
  tags: Optional[list[str]] = None
53
  metadata: Optional[dict[str, Any]] = None
54
  review_status: Optional[str] = None
 
113
  review_status: Optional[str] = None
114
 
115
 
116
+ # --- Orgs ---
117
+
118
+ class OrgCreate(BaseModel):
119
+ id: str
120
+ display_name: str
121
+ parent_org_id: Optional[str] = None
122
+ website: Optional[str] = None
123
+ hf_org: Optional[str] = None
124
+ tags: list[str] = []
125
+ metadata: dict[str, Any] = {}
126
+ review_status: str = "draft"
127
+
128
+
129
+ class OrgPatch(BaseModel):
130
+ display_name: Optional[str] = None
131
+ parent_org_id: Optional[str] = None
132
+ website: Optional[str] = None
133
+ hf_org: Optional[str] = None
134
+ tags: Optional[list[str]] = None
135
+ metadata: Optional[dict[str, Any]] = None
136
+ review_status: Optional[str] = None
137
+
138
+
139
  # --- Aliases ---
140
 
141
  class AliasPatch(BaseModel):
src/eval_card_registry/cli.py CHANGED
@@ -6,14 +6,28 @@ Commands:
6
  stats Print registry summary
7
  sync Batch sync one or all EEE configs → eval_results table
8
  """
 
9
  from pathlib import Path
10
  from typing import Optional
11
 
12
  import typer
13
  import yaml
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  from eval_card_registry.store.hf_store import get_store
16
- from eval_card_registry.store import queries
17
  from eval_card_registry.store.queries import _is_na
18
 
19
  app = typer.Typer(help="eval-card-registry CLI")
@@ -34,6 +48,11 @@ def _load_store():
34
  def seed(
35
  local: bool = typer.Option(False, "--local", help="Write to fixtures/ instead of HF Hub"),
36
  seed_dir: str = typer.Option("./seed", "--seed-dir"),
 
 
 
 
 
37
  ):
38
  """Load known canonical entities from seed YAML files."""
39
  import os
@@ -43,11 +62,171 @@ def seed(
43
  store = _load_store()
44
  seed_path = Path(seed_dir)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # table name, yaml file, label, entity_type (for alias creation)
47
  seed_specs = [
 
48
  ("canonical_benchmarks", seed_path / "benchmarks.yaml", "benchmarks", "benchmark"),
49
  ("canonical_metrics", seed_path / "metrics.yaml", "metrics", "metric"),
50
  ("eval_harnesses", seed_path / "harnesses.yaml", "harnesses", "harness"),
 
 
 
 
51
  ]
52
 
53
  alias_count = 0
@@ -55,23 +234,44 @@ def seed(
55
  # Alias key: (raw_value, entity_type, canonical_id, source_config)
56
  seed_snapshot: list[tuple[str, str, set[str], set[tuple[str, str, str, Optional[str]]]]] = []
57
 
 
 
 
 
 
58
  for table, yaml_file, label, entity_type in seed_specs:
59
- if not yaml_file.exists():
60
- typer.echo(f" [skip] {yaml_file} not found")
61
- continue
62
- with open(yaml_file) as f:
63
- items = yaml.safe_load(f) or []
 
 
 
 
 
 
 
64
 
65
  yaml_ids: set[str] = set()
66
  yaml_alias_keys: set[tuple[str, str, str, Optional[str]]] = set()
67
 
68
- for item in items:
 
69
  # Pop 'aliases' / 'scoped_aliases' before upserting — not table columns.
70
  extra_aliases = item.pop("aliases", []) or []
71
  scoped_aliases = item.pop("scoped_aliases", {}) or {}
72
- queries.upsert_entity(store, table, item)
73
- canonical_id = item["id"]
74
- display_name = item.get("display_name", "")
 
 
 
 
 
 
 
 
75
  yaml_ids.add(canonical_id)
76
 
77
  # Global aliases (source_config=None): matched regardless of caller's source_config.
@@ -102,63 +302,129 @@ def seed(
102
  "strategy": "seed",
103
  "confidence": 1.0,
104
  "notes": None,
105
- })
106
  alias_count += 1
107
  except ValueError:
108
- pass # alias already exists (e.g. re-seeding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  seed_snapshot.append((table, entity_type, yaml_ids, yaml_alias_keys))
111
  typer.echo(f" {label}: {len(items)}")
112
 
113
- # Remove seed-originated entities and aliases that are no longer in the YAML.
114
- # Only touches rows that were created by seed (strategy == "seed"), never
115
- # sync-created aliases or auto-draft entities.
 
 
116
  removed_entities = 0
117
  removed_aliases = 0
118
- for table, entity_type, yaml_ids, yaml_alias_keys in seed_snapshot:
119
- # Remove stale seed aliases for this entity type
120
- aliases_df = store.table("aliases")
121
- seed_mask = (aliases_df["strategy"] == "seed") & (aliases_df["entity_type"] == entity_type)
122
- if seed_mask.any():
123
- seed_aliases = aliases_df[seed_mask]
124
- stale_alias_mask = seed_mask.copy()
125
- for idx in seed_aliases.index:
126
- row = seed_aliases.loc[idx]
127
- sc = row.get("source_config")
128
- if _is_na(sc):
129
- sc = None
130
- key = (row["raw_value"], row["entity_type"], row["canonical_id"], sc)
131
- if key in yaml_alias_keys:
132
- stale_alias_mask[idx] = False
133
- n_stale = stale_alias_mask.sum()
134
- if n_stale > 0:
135
- store.set_table("aliases", aliases_df[~stale_alias_mask].reset_index(drop=True))
136
- removed_aliases += int(n_stale)
137
-
138
- # Remove stale seed entities — only those with review_status "reviewed"
139
- # that came from seed and are no longer in the YAML.
140
- entity_df = store.table(table)
141
- if len(entity_df) > 0:
142
- stale = entity_df["id"].isin(yaml_ids)
143
- stale_entities = entity_df[~stale & (entity_df["review_status"] == "reviewed")]
144
- # Only remove if every alias for this entity is also seed-originated,
145
- # meaning it wasn't referenced by sync data.
146
- current_aliases = store.table("aliases")
147
- for eid in stale_entities["id"]:
148
- entity_aliases = current_aliases[
149
- (current_aliases["canonical_id"] == eid)
150
- & (current_aliases["entity_type"] == entity_type)
151
- ]
152
- if len(entity_aliases) == 0 or (entity_aliases["strategy"] == "seed").all():
153
- entity_df = entity_df[entity_df["id"] != eid]
154
- # Also remove any remaining aliases pointing to it
155
- current_aliases = current_aliases[
156
- ~((current_aliases["canonical_id"] == eid)
157
- & (current_aliases["entity_type"] == entity_type))
158
  ]
159
- removed_entities += 1
160
- store.set_table(table, entity_df.reset_index(drop=True))
161
- store.set_table("aliases", current_aliases.reset_index(drop=True))
 
 
 
 
 
 
 
162
 
163
  typer.echo(f" aliases: {alias_count} added, {removed_aliases} removed")
164
  if removed_entities:
 
6
  stats Print registry summary
7
  sync Batch sync one or all EEE configs → eval_results table
8
  """
9
+ import json
10
  from pathlib import Path
11
  from typing import Optional
12
 
13
  import typer
14
  import yaml
15
 
16
+
17
+ def _json_encode_if_needed(value):
18
+ """Encode lists/dicts as JSON strings; pass through anything else.
19
+
20
+ seed/models.yaml uses YAML-native lists for `tags` (e.g. `["open-weight"]`)
21
+ while seed/benchmarks.yaml stores them pre-encoded as strings (e.g.
22
+ `'["instruction-following"]'`). The canonical_* parquet columns are all
23
+ VARCHAR, so we coerce on the way in to keep both formats supported.
24
+ """
25
+ if isinstance(value, (list, dict)):
26
+ return json.dumps(value)
27
+ return value
28
+
29
  from eval_card_registry.store.hf_store import get_store
30
+ from eval_card_registry.store import queries, schemas
31
  from eval_card_registry.store.queries import _is_na
32
 
33
  app = typer.Typer(help="eval-card-registry CLI")
 
48
  def seed(
49
  local: bool = typer.Option(False, "--local", help="Write to fixtures/ instead of HF Hub"),
50
  seed_dir: str = typer.Option("./seed", "--seed-dir"),
51
+ prune_stale: bool = typer.Option(
52
+ False,
53
+ "--prune-stale/--no-prune-stale",
54
+ help="Remove reviewed seed entities and seed aliases absent from the current YAML snapshot.",
55
+ ),
56
  ):
57
  """Load known canonical entities from seed YAML files."""
58
  import os
 
62
  store = _load_store()
63
  seed_path = Path(seed_dir)
64
 
65
+ # ------------------------------------------------------------------
66
+ # Models — three-layer load from seed/models/:
67
+ # sources/*.generated.yaml → external catalog data (e.g. models.dev),
68
+ # flat lists, never hand-edited
69
+ # core.yaml → curated canonicals (the source of truth),
70
+ # flat list OR {skip_ids, entries} dict
71
+ # enrichments/aliases.yaml → optional alias-only entries ({id, aliases})
72
+ # that union onto whatever exists
73
+ #
74
+ # Merge order: sources → core → enrichments. Field-level merge per entry
75
+ # (aliases / tags UNION; other scalars prefer non-empty, last-write-wins).
76
+ # `skip_ids` from core drops generated entries we don't want.
77
+ # ------------------------------------------------------------------
78
+ def _load_models_merged() -> list[dict]:
79
+ models_dir = seed_path / "models"
80
+ sources_dir = models_dir / "sources"
81
+ core_file = models_dir / "core.yaml"
82
+ enrichments_file = models_dir / "enrichments" / "aliases.yaml"
83
+
84
+ source_entries: list[dict] = []
85
+ core_entries: list[dict] = []
86
+ enrichment_entries: list[dict] = []
87
+ skip_ids: set[str] = set()
88
+
89
+ if sources_dir.is_dir():
90
+ for src_path in sorted(sources_dir.glob("*.generated.yaml")):
91
+ with open(src_path) as f:
92
+ loaded = yaml.safe_load(f) or []
93
+ if not isinstance(loaded, list):
94
+ raise typer.BadParameter(f"{src_path} must be a flat list")
95
+ source_entries.extend(loaded)
96
+
97
+ skip_source_ids: set[str] = set()
98
+ if core_file.exists():
99
+ with open(core_file) as f:
100
+ loaded = yaml.safe_load(f) or {}
101
+ if isinstance(loaded, list):
102
+ core_entries = loaded
103
+ elif isinstance(loaded, dict):
104
+ core_entries = loaded.get("entries", []) or []
105
+ skip_ids = set(loaded.get("skip_ids", []) or [])
106
+ # `skip_source_ids` drops these ids from sources/enrichments only,
107
+ # leaving core entries authoritative. Used when models.dev (or any
108
+ # auto-generated source) ships bad aliases for a model that core.yaml
109
+ # curates correctly — otherwise the loader's UNION-merge would
110
+ # re-introduce the bad aliases on every refresh.
111
+ skip_source_ids = set(loaded.get("skip_source_ids", []) or [])
112
+ else:
113
+ raise typer.BadParameter(f"{core_file} unexpected shape {type(loaded)}")
114
+
115
+ if enrichments_file.exists():
116
+ with open(enrichments_file) as f:
117
+ loaded = yaml.safe_load(f) or []
118
+ if not isinstance(loaded, list):
119
+ raise typer.BadParameter(f"{enrichments_file} must be a flat list")
120
+ enrichment_entries = loaded
121
+
122
+ def _merge_into(target: dict, src: dict) -> dict:
123
+ """Merge two entries with the same canonical_id.
124
+
125
+ Field-level merge policy:
126
+ - `aliases`: UNION (case-insensitive dedup).
127
+ - `tags`: UNION (case-insensitive dedup). Both YAML-list and
128
+ JSON-encoded-string forms supported. Protects against session
129
+ additions overwriting `[open-weight, moe]` with `[open-weight]`.
130
+ - Other scalars: prefer non-empty across the pair; when both
131
+ sides have a non-empty value, last-write-wins. Protects against
132
+ session-batch entries that omit `architecture` /
133
+ `params_billions` from silently overwriting earlier rich entries.
134
+
135
+ "Empty" means: None, "", [], {}, or default-looking '{}' / '[]'.
136
+ """
137
+ import json as _json
138
+
139
+ existing_aliases = list(target.get("aliases") or [])
140
+ existing_lc = {a.lower() for a in existing_aliases if a}
141
+ new_aliases = list(src.get("aliases") or [])
142
+ for a in new_aliases:
143
+ if a and a.lower() not in existing_lc:
144
+ existing_aliases.append(a)
145
+ existing_lc.add(a.lower())
146
+
147
+ def _decode_list_field(v):
148
+ """tags / metadata may be either YAML-list or JSON-encoded
149
+ string. Return a list (best-effort) and a boolean indicating
150
+ whether to re-encode on write."""
151
+ if v is None:
152
+ return [], False
153
+ if isinstance(v, list):
154
+ return list(v), False
155
+ if isinstance(v, str):
156
+ s = v.strip()
157
+ if not s or s in ("[]", "null"):
158
+ return [], True
159
+ try:
160
+ d = _json.loads(s)
161
+ if isinstance(d, list):
162
+ return list(d), True
163
+ except (ValueError, TypeError):
164
+ pass
165
+ return [v], False
166
+
167
+ # Union tags (handles both list and JSON-string formats)
168
+ tgt_tags, tgt_was_json = _decode_list_field(target.get("tags"))
169
+ src_tags, src_was_json = _decode_list_field(src.get("tags"))
170
+ seen_tags_lc = {str(t).lower() for t in tgt_tags}
171
+ for t in src_tags:
172
+ if t is not None and str(t).lower() not in seen_tags_lc:
173
+ tgt_tags.append(t)
174
+ seen_tags_lc.add(str(t).lower())
175
+ # Re-encode if either source was a JSON string (the parquet column
176
+ # is VARCHAR; _json_encode_if_needed downstream handles either).
177
+ tags_merged = _json.dumps(tgt_tags) if (tgt_was_json or src_was_json) else tgt_tags
178
+
179
+ def _is_empty(v) -> bool:
180
+ if v is None:
181
+ return True
182
+ if isinstance(v, (list, dict)) and len(v) == 0:
183
+ return True
184
+ if isinstance(v, str) and v.strip() in ("", "[]", "{}"):
185
+ return True
186
+ return False
187
+
188
+ merged = dict(target)
189
+ for k, v in src.items():
190
+ if k in ("aliases", "tags"):
191
+ continue # handled separately
192
+ if _is_empty(v):
193
+ continue
194
+ merged[k] = v
195
+ merged["aliases"] = existing_aliases
196
+ merged["tags"] = tags_merged
197
+ return merged
198
+
199
+ by_id: dict[str, dict] = {}
200
+
201
+ def _absorb(entries: list[dict], extra_skip: set[str] = frozenset()) -> None:
202
+ drop = skip_ids | extra_skip
203
+ for e in entries:
204
+ if "id" not in e:
205
+ raise typer.BadParameter(f"models seed entry missing id: {e!r}")
206
+ if e["id"] in drop:
207
+ continue
208
+ if e["id"] in by_id:
209
+ by_id[e["id"]] = _merge_into(by_id[e["id"]], e)
210
+ else:
211
+ by_id[e["id"]] = e
212
+
213
+ # Sources/enrichments respect both skip_ids and skip_source_ids;
214
+ # core entries respect only skip_ids so curated overrides always apply.
215
+ _absorb(source_entries, extra_skip=skip_source_ids)
216
+ _absorb(core_entries)
217
+ _absorb(enrichment_entries, extra_skip=skip_source_ids)
218
+ return list(by_id.values())
219
+
220
  # table name, yaml file, label, entity_type (for alias creation)
221
  seed_specs = [
222
+ ("canonical_orgs", seed_path / "orgs.yaml", "orgs", "org"),
223
  ("canonical_benchmarks", seed_path / "benchmarks.yaml", "benchmarks", "benchmark"),
224
  ("canonical_metrics", seed_path / "metrics.yaml", "metrics", "metric"),
225
  ("eval_harnesses", seed_path / "harnesses.yaml", "harnesses", "harness"),
226
+ # Models: load via the merge helper; pass a sentinel path that
227
+ # signals the loop below to invoke _load_models_merged() instead of
228
+ # reading a single YAML file.
229
+ ("canonical_models", "__merged_models__", "models", "model"),
230
  ]
231
 
232
  alias_count = 0
 
234
  # Alias key: (raw_value, entity_type, canonical_id, source_config)
235
  seed_snapshot: list[tuple[str, str, set[str], set[tuple[str, str, str, Optional[str]]]]] = []
236
 
237
+ # Build the alias index once so add_alias collision checks are O(1) instead
238
+ # of O(N) DataFrame mask scans. Combined with buffered=True below, this
239
+ # avoids the O(N²) pd.concat-per-row cost on ~1k entities + ~13k aliases.
240
+ queries._rebuild_alias_index(store)
241
+
242
  for table, yaml_file, label, entity_type in seed_specs:
243
+ table_columns = set(schemas.empty(table).columns)
244
+ if yaml_file == "__merged_models__":
245
+ items = _load_models_merged()
246
+ if not items:
247
+ typer.echo(f" [skip] no model entries found in seed/models.yaml or _overrides/")
248
+ continue
249
+ else:
250
+ if not yaml_file.exists():
251
+ typer.echo(f" [skip] {yaml_file} not found")
252
+ continue
253
+ with open(yaml_file) as f:
254
+ items = yaml.safe_load(f) or []
255
 
256
  yaml_ids: set[str] = set()
257
  yaml_alias_keys: set[tuple[str, str, str, Optional[str]]] = set()
258
 
259
+ for original_item in items:
260
+ item = dict(original_item)
261
  # Pop 'aliases' / 'scoped_aliases' before upserting — not table columns.
262
  extra_aliases = item.pop("aliases", []) or []
263
  scoped_aliases = item.pop("scoped_aliases", {}) or {}
264
+ # Normalize tags / metadata: YAML may have native lists/dicts, but
265
+ # the canonical_* parquet columns are VARCHAR, so encode if needed.
266
+ for col in ("tags", "metadata"):
267
+ if col in item:
268
+ item[col] = _json_encode_if_needed(item[col])
269
+ entity_item = {k: v for k, v in item.items() if k in table_columns}
270
+ if "id" not in entity_item:
271
+ raise typer.BadParameter(f"{label} seed entry is missing required id: {original_item!r}")
272
+ queries.upsert_entity(store, table, entity_item, buffered=True)
273
+ canonical_id = entity_item["id"]
274
+ display_name = entity_item.get("display_name", "")
275
  yaml_ids.add(canonical_id)
276
 
277
  # Global aliases (source_config=None): matched regardless of caller's source_config.
 
302
  "strategy": "seed",
303
  "confidence": 1.0,
304
  "notes": None,
305
+ }, buffered=True)
306
  alias_count += 1
307
  except ValueError:
308
+ # add_alias raises on uniqueness collision: an alias row
309
+ # already exists for (entity_type, raw_value, source_config).
310
+ # YAML is the source of truth, so if the existing row points
311
+ # at a different canonical_id, this is a YAML rename and we
312
+ # must REPOINT the existing row — NOT silently swallow it.
313
+ # Without this, stale-removal at the end of seed would then
314
+ # delete the row (its old key is no longer in
315
+ # yaml_alias_keys), causing total alias loss.
316
+ aliases_df = store.table("aliases")
317
+ mask = (
318
+ (aliases_df["raw_value"] == raw_value)
319
+ & (aliases_df["entity_type"] == entity_type)
320
+ & (aliases_df["status"] != "rejected")
321
+ )
322
+ if source_cfg is not None:
323
+ mask = mask & (aliases_df["source_config"] == source_cfg)
324
+ else:
325
+ mask = mask & aliases_df["source_config"].isna()
326
+ existing = aliases_df[mask]
327
+ if existing.empty:
328
+ # Collision came from the pending buffer (this run added
329
+ # the same key earlier). For same-canonical re-adds this
330
+ # is a no-op; for different-canonical we must mutate the
331
+ # pending dict in place so the rename isn't lost on
332
+ # flush. _alias_index points at the same dict, so
333
+ # updating it here keeps the index consistent.
334
+ for p in queries._get_pending(store, "aliases"):
335
+ if (p.get("entity_type") == entity_type
336
+ and p.get("raw_value") == raw_value
337
+ and queries._source_config_key(p.get("source_config")) == queries._source_config_key(source_cfg)
338
+ and p.get("status") != "rejected"):
339
+ if p["canonical_id"] != canonical_id:
340
+ prev = p["canonical_id"]
341
+ p["canonical_id"] = canonical_id
342
+ p["source_field"] = "seed"
343
+ p["status"] = "confirmed"
344
+ p["strategy"] = "seed"
345
+ p["confidence"] = 1.0
346
+ typer.echo(
347
+ f" [rename] alias {raw_value!r} ({entity_type}) "
348
+ f"moved {prev!r} -> {canonical_id!r} (pending)"
349
+ )
350
+ alias_count += 1
351
+ break
352
+ continue
353
+ row = existing.iloc[0]
354
+ if row["canonical_id"] != canonical_id:
355
+ # Rename: repoint the existing row at the new canonical.
356
+ queries.update_alias(store, row["id"], {
357
+ "canonical_id": canonical_id,
358
+ "source_field": "seed",
359
+ "status": "confirmed",
360
+ "strategy": "seed",
361
+ "confidence": 1.0,
362
+ })
363
+ typer.echo(
364
+ f" [rename] alias {raw_value!r} ({entity_type}) "
365
+ f"moved {row['canonical_id']!r} -> {canonical_id!r}"
366
+ )
367
+ alias_count += 1
368
+ # else: identical re-seed of an existing alias — no-op.
369
 
370
  seed_snapshot.append((table, entity_type, yaml_ids, yaml_alias_keys))
371
  typer.echo(f" {label}: {len(items)}")
372
 
373
+ # Flush all buffered upserts (entities + aliases) into their tables in a
374
+ # single pd.concat per table. prune_stale below reads store.table(...)
375
+ # directly, so this must happen before that block.
376
+ queries.flush_pending(store)
377
+
378
  removed_entities = 0
379
  removed_aliases = 0
380
+ if prune_stale:
381
+ # Remove seed-originated entities and aliases that are no longer in the YAML.
382
+ # Only touches rows that were created by seed (strategy == "seed"), never
383
+ # sync-created aliases or auto-draft entities.
384
+ for table, entity_type, yaml_ids, yaml_alias_keys in seed_snapshot:
385
+ # Remove stale seed aliases for this entity type.
386
+ aliases_df = store.table("aliases")
387
+ seed_mask = (aliases_df["strategy"] == "seed") & (aliases_df["entity_type"] == entity_type)
388
+ if seed_mask.any():
389
+ seed_aliases = aliases_df[seed_mask]
390
+ stale_alias_mask = seed_mask.copy()
391
+ for idx in seed_aliases.index:
392
+ row = seed_aliases.loc[idx]
393
+ sc = row.get("source_config")
394
+ if _is_na(sc):
395
+ sc = None
396
+ key = (row["raw_value"], row["entity_type"], row["canonical_id"], sc)
397
+ if key in yaml_alias_keys:
398
+ stale_alias_mask[idx] = False
399
+ n_stale = stale_alias_mask.sum()
400
+ if n_stale > 0:
401
+ store.set_table("aliases", aliases_df[~stale_alias_mask].reset_index(drop=True))
402
+ removed_aliases += int(n_stale)
403
+
404
+ # Remove stale seed entities — only those with review_status "reviewed"
405
+ # that came from seed and are no longer in the YAML.
406
+ entity_df = store.table(table)
407
+ if len(entity_df) > 0:
408
+ stale = entity_df["id"].isin(yaml_ids)
409
+ stale_entities = entity_df[~stale & (entity_df["review_status"] == "reviewed")]
410
+ # Only remove if every alias for this entity is also seed-originated,
411
+ # meaning it wasn't referenced by sync data.
412
+ current_aliases = store.table("aliases")
413
+ for eid in stale_entities["id"]:
414
+ entity_aliases = current_aliases[
415
+ (current_aliases["canonical_id"] == eid)
416
+ & (current_aliases["entity_type"] == entity_type)
 
 
 
417
  ]
418
+ if len(entity_aliases) == 0 or (entity_aliases["strategy"] == "seed").all():
419
+ entity_df = entity_df[entity_df["id"] != eid]
420
+ # Also remove any remaining aliases pointing to it.
421
+ current_aliases = current_aliases[
422
+ ~((current_aliases["canonical_id"] == eid)
423
+ & (current_aliases["entity_type"] == entity_type))
424
+ ]
425
+ removed_entities += 1
426
+ store.set_table(table, entity_df.reset_index(drop=True))
427
+ store.set_table("aliases", current_aliases.reset_index(drop=True))
428
 
429
  typer.echo(f" aliases: {alias_count} added, {removed_aliases} removed")
430
  if removed_entities:
src/eval_card_registry/main.py CHANGED
@@ -9,6 +9,7 @@ from eval_card_registry.services.log_writer import ResolveLogWriter
9
  from eval_card_registry.api.routes_resolve import router as resolve_router
10
  from eval_card_registry.api.routes_entities import router as entities_router
11
  from eval_card_registry.api.routes_aliases import router as aliases_router
 
12
  from eval_card_registry.api.routes_health import router as health_router
13
 
14
 
@@ -44,5 +45,6 @@ PREFIX = "/api/v1"
44
 
45
  app.include_router(resolve_router, prefix=PREFIX)
46
  app.include_router(entities_router, prefix=PREFIX)
 
47
  app.include_router(aliases_router, prefix=PREFIX)
48
  app.include_router(health_router, prefix=PREFIX)
 
9
  from eval_card_registry.api.routes_resolve import router as resolve_router
10
  from eval_card_registry.api.routes_entities import router as entities_router
11
  from eval_card_registry.api.routes_aliases import router as aliases_router
12
+ from eval_card_registry.api.routes_orgs import router as orgs_router
13
  from eval_card_registry.api.routes_health import router as health_router
14
 
15
 
 
45
 
46
  app.include_router(resolve_router, prefix=PREFIX)
47
  app.include_router(entities_router, prefix=PREFIX)
48
+ app.include_router(orgs_router, prefix=PREFIX)
49
  app.include_router(aliases_router, prefix=PREFIX)
50
  app.include_router(health_router, prefix=PREFIX)
src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc and b/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc differ
 
src/eval_card_registry/services/resolution_service.py CHANGED
@@ -28,6 +28,13 @@ _ENTITY_TABLE = {
28
  "benchmark": "canonical_benchmarks",
29
  "metric": "canonical_metrics",
30
  "harness": "eval_harnesses",
 
 
 
 
 
 
 
31
  }
32
 
33
 
@@ -62,6 +69,7 @@ def _no_match_result() -> dict:
62
  "confidence": 0.0,
63
  "created_new": False,
64
  "review_status": None,
 
65
  }
66
 
67
 
@@ -71,6 +79,7 @@ def _match_result(
71
  confidence: float,
72
  review_status: Optional[str],
73
  created_new: bool = False,
 
74
  ) -> dict:
75
  return {
76
  "canonical_id": canonical_id,
@@ -78,17 +87,30 @@ def _match_result(
78
  "confidence": confidence,
79
  "created_new": created_new,
80
  "review_status": review_status,
 
81
  }
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  class ResolutionService:
85
  def __init__(self, registry_store: RegistryStore) -> None:
86
  self.store = registry_store
87
  self._resolver: Optional[Resolver] = None
88
- # Cache: (raw_value, entity_type) → resolve result dict.
89
  # Avoids re-running the full strategy chain for duplicate strings
90
  # (e.g. "Accuracy" appears in every record).
91
- self._resolve_cache: dict[tuple[str, str], dict] = {}
92
 
93
  def _get_resolver(self) -> Resolver:
94
  if self._resolver is None:
@@ -133,6 +155,7 @@ class ResolutionService:
133
  result.strategy,
134
  result.confidence,
135
  entity.get("review_status") if entity else None,
 
136
  )
137
  else:
138
  result_dict = _no_match_result()
@@ -149,6 +172,7 @@ class ResolutionService:
149
  existing["strategy"],
150
  existing["confidence"],
151
  entity.get("review_status") if entity else None,
 
152
  )
153
  self._resolve_cache[cache_key] = result_dict
154
  return result_dict
@@ -236,6 +260,7 @@ class ResolutionService:
236
  result.confidence,
237
  entity.get("review_status") if entity else "draft",
238
  created_new=created_new,
 
239
  )
240
  self._resolve_cache[cache_key] = result_dict
241
  return result_dict
@@ -258,17 +283,36 @@ class ResolutionService:
258
  "updated_at": now,
259
  }
260
  if entity_type == "model":
261
- base.update({"developer": None, "family": None, "architecture": None, "params_billions": None, "tags": "[]"})
 
 
 
 
 
 
 
 
262
  elif entity_type == "benchmark":
263
  base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
264
  elif entity_type == "metric":
265
  base.update({"score_type": None, "lower_is_better": False, "min_score": None, "max_score": None})
266
  elif entity_type == "harness":
267
  base.update({"version": None, "fork_url": None})
 
 
268
 
269
  queries.upsert_entity(self.store, table, base, buffered=True)
270
  return candidate_id
271
 
 
 
 
 
 
 
 
 
 
272
  def _find_alias_id(
273
  self,
274
  raw_value: str,
 
28
  "benchmark": "canonical_benchmarks",
29
  "metric": "canonical_metrics",
30
  "harness": "eval_harnesses",
31
+ "org": "canonical_orgs",
32
+ }
33
+
34
+ _PARENT_FIELD = {
35
+ "model": "parent_model_id",
36
+ "benchmark": "parent_benchmark_id",
37
+ "org": "parent_org_id",
38
  }
39
 
40
 
 
69
  "confidence": 0.0,
70
  "created_new": False,
71
  "review_status": None,
72
+ "parent_canonical_id": None,
73
  }
74
 
75
 
 
79
  confidence: float,
80
  review_status: Optional[str],
81
  created_new: bool = False,
82
+ parent_canonical_id: Optional[str] = None,
83
  ) -> dict:
84
  return {
85
  "canonical_id": canonical_id,
 
87
  "confidence": confidence,
88
  "created_new": created_new,
89
  "review_status": review_status,
90
+ "parent_canonical_id": parent_canonical_id,
91
  }
92
 
93
 
94
+ def _parent_canonical_id(entity_type: str, entity: Optional[dict]) -> Optional[str]:
95
+ if not entity:
96
+ return None
97
+ field = _PARENT_FIELD.get(entity_type)
98
+ if not field:
99
+ return None
100
+ value = entity.get(field)
101
+ if queries._is_na(value):
102
+ return None
103
+ return value or None
104
+
105
+
106
  class ResolutionService:
107
  def __init__(self, registry_store: RegistryStore) -> None:
108
  self.store = registry_store
109
  self._resolver: Optional[Resolver] = None
110
+ # Cache: (raw_value, entity_type, source_config) → resolve result dict.
111
  # Avoids re-running the full strategy chain for duplicate strings
112
  # (e.g. "Accuracy" appears in every record).
113
+ self._resolve_cache: dict[tuple[str, str, Optional[str]], dict] = {}
114
 
115
  def _get_resolver(self) -> Resolver:
116
  if self._resolver is None:
 
155
  result.strategy,
156
  result.confidence,
157
  entity.get("review_status") if entity else None,
158
+ parent_canonical_id=_parent_canonical_id(entity_type, entity),
159
  )
160
  else:
161
  result_dict = _no_match_result()
 
172
  existing["strategy"],
173
  existing["confidence"],
174
  entity.get("review_status") if entity else None,
175
+ parent_canonical_id=_parent_canonical_id(entity_type, entity),
176
  )
177
  self._resolve_cache[cache_key] = result_dict
178
  return result_dict
 
260
  result.confidence,
261
  entity.get("review_status") if entity else "draft",
262
  created_new=created_new,
263
+ parent_canonical_id=_parent_canonical_id(entity_type, entity),
264
  )
265
  self._resolve_cache[cache_key] = result_dict
266
  return result_dict
 
283
  "updated_at": now,
284
  }
285
  if entity_type == "model":
286
+ base.update({
287
+ "developer": None,
288
+ "org_id": self._resolve_model_org_id(raw_value),
289
+ "family": None,
290
+ "architecture": None,
291
+ "params_billions": None,
292
+ "parent_model_id": None,
293
+ "tags": "[]",
294
+ })
295
  elif entity_type == "benchmark":
296
  base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
297
  elif entity_type == "metric":
298
  base.update({"score_type": None, "lower_is_better": False, "min_score": None, "max_score": None})
299
  elif entity_type == "harness":
300
  base.update({"version": None, "fork_url": None})
301
+ elif entity_type == "org":
302
+ base.update({"parent_org_id": None, "website": None, "hf_org": None, "tags": "[]"})
303
 
304
  queries.upsert_entity(self.store, table, base, buffered=True)
305
  return candidate_id
306
 
307
+ def _resolve_model_org_id(self, raw_value: str) -> Optional[str]:
308
+ if "/" not in raw_value:
309
+ return None
310
+ raw_org = raw_value.split("/", 1)[0].strip()
311
+ if not raw_org:
312
+ return None
313
+ result = self._get_resolver().resolve(raw_org, "org", None)
314
+ return result.canonical_id
315
+
316
  def _find_alias_id(
317
  self,
318
  raw_value: str,
src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc differ
 
src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc differ
 
src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc CHANGED
Binary files a/src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc differ
 
src/eval_card_registry/store/hf_store.py CHANGED
@@ -28,6 +28,7 @@ def _fixtures_path() -> Path:
28
 
29
 
30
  TABLE_NAMES = [
 
31
  "canonical_models",
32
  "canonical_benchmarks",
33
  "canonical_metrics",
@@ -40,6 +41,7 @@ TABLE_NAMES = [
40
 
41
  # Tables needed for query-only (read-only) mode
42
  QUERY_TABLE_NAMES = [
 
43
  "canonical_models",
44
  "canonical_benchmarks",
45
  "canonical_metrics",
 
28
 
29
 
30
  TABLE_NAMES = [
31
+ "canonical_orgs",
32
  "canonical_models",
33
  "canonical_benchmarks",
34
  "canonical_metrics",
 
41
 
42
  # Tables needed for query-only (read-only) mode
43
  QUERY_TABLE_NAMES = [
44
+ "canonical_orgs",
45
  "canonical_models",
46
  "canonical_benchmarks",
47
  "canonical_metrics",
src/eval_card_registry/store/queries.py CHANGED
@@ -32,6 +32,11 @@ def _is_na(value) -> bool:
32
  return False
33
 
34
 
 
 
 
 
 
35
  def _row_to_dict(row: pd.Series) -> dict:
36
  """Convert a Series to dict, coercing pandas NA/NaN/NaT to None for JSON.
37
  Uses Series.to_dict() so numpy scalars are unboxed to Python types."""
@@ -154,13 +159,22 @@ def _rebuild_alias_index(store: RegistryStore) -> None:
154
  _alias_index = {}
155
  df = store.table("aliases")
156
  for _, row in df.iterrows():
157
- key = (row["entity_type"], row["raw_value"], row.get("source_config"))
158
  if row.get("status") != "rejected":
159
- _alias_index[key] = row.to_dict()
 
 
 
 
 
 
160
  # Also index pending aliases
161
  for pending_row in _get_pending(store, "aliases"):
162
  if pending_row.get("status") != "rejected":
163
- key = (pending_row["entity_type"], pending_row["raw_value"], pending_row.get("source_config"))
 
 
 
 
164
  _alias_index[key] = pending_row
165
 
166
 
@@ -170,6 +184,7 @@ def get_alias(
170
  entity_type: str,
171
  source_config: Optional[str],
172
  ) -> Optional[dict]:
 
173
  # Fast path: use index if available
174
  if _alias_index:
175
  if source_config:
@@ -208,7 +223,7 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
208
  """
209
  raw_value = data["raw_value"]
210
  entity_type = data["entity_type"]
211
- source_config = data.get("source_config")
212
  key = (entity_type, raw_value, source_config)
213
 
214
  # Check uniqueness via index if available
@@ -238,14 +253,21 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
238
  # Check pending buffer
239
  for p in _get_pending(store, "aliases"):
240
  if (p["entity_type"] == entity_type and p["raw_value"] == raw_value
241
- and p.get("source_config") == source_config and p.get("status") != "rejected"):
 
242
  raise ValueError(
243
  f"Alias already exists for ({entity_type!r}, {raw_value!r}, source_config={source_config!r}). "
244
  "Use update_alias() to modify an existing alias."
245
  )
246
 
247
  now = _now()
248
- row = {**data, "id": str(uuid.uuid4()), "created_at": now, "updated_at": now}
 
 
 
 
 
 
249
 
250
  if buffered:
251
  _get_pending(store, "aliases").append(row)
@@ -254,8 +276,9 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
254
  df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
255
  store.set_table("aliases", df)
256
 
257
- # Update index (only non-rejected aliases block future inserts)
258
- if row.get("status") != "rejected":
 
259
  _alias_index[key] = row
260
  return row
261
 
@@ -269,7 +292,20 @@ def update_alias(store: RegistryStore, alias_id: str, updates: dict) -> Optional
269
  df.loc[df["id"] == alias_id, col] = val
270
  df.loc[df["id"] == alias_id, "updated_at"] = _now()
271
  store.set_table("aliases", df)
272
- return _row_to_dict(df[df["id"] == alias_id].iloc[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
 
275
  # ------------------------------------------------------------------
 
32
  return False
33
 
34
 
35
+ def _source_config_key(value) -> Optional[str]:
36
+ """Normalize nullable source_config values for alias-index keys."""
37
+ return None if _is_na(value) else value
38
+
39
+
40
  def _row_to_dict(row: pd.Series) -> dict:
41
  """Convert a Series to dict, coercing pandas NA/NaN/NaT to None for JSON.
42
  Uses Series.to_dict() so numpy scalars are unboxed to Python types."""
 
159
  _alias_index = {}
160
  df = store.table("aliases")
161
  for _, row in df.iterrows():
 
162
  if row.get("status") != "rejected":
163
+ row_dict = _row_to_dict(row)
164
+ key = (
165
+ row_dict["entity_type"],
166
+ row_dict["raw_value"],
167
+ _source_config_key(row_dict.get("source_config")),
168
+ )
169
+ _alias_index[key] = row_dict
170
  # Also index pending aliases
171
  for pending_row in _get_pending(store, "aliases"):
172
  if pending_row.get("status") != "rejected":
173
+ key = (
174
+ pending_row["entity_type"],
175
+ pending_row["raw_value"],
176
+ _source_config_key(pending_row.get("source_config")),
177
+ )
178
  _alias_index[key] = pending_row
179
 
180
 
 
184
  entity_type: str,
185
  source_config: Optional[str],
186
  ) -> Optional[dict]:
187
+ source_config = _source_config_key(source_config)
188
  # Fast path: use index if available
189
  if _alias_index:
190
  if source_config:
 
223
  """
224
  raw_value = data["raw_value"]
225
  entity_type = data["entity_type"]
226
+ source_config = _source_config_key(data.get("source_config"))
227
  key = (entity_type, raw_value, source_config)
228
 
229
  # Check uniqueness via index if available
 
253
  # Check pending buffer
254
  for p in _get_pending(store, "aliases"):
255
  if (p["entity_type"] == entity_type and p["raw_value"] == raw_value
256
+ and _source_config_key(p.get("source_config")) == source_config
257
+ and p.get("status") != "rejected"):
258
  raise ValueError(
259
  f"Alias already exists for ({entity_type!r}, {raw_value!r}, source_config={source_config!r}). "
260
  "Use update_alias() to modify an existing alias."
261
  )
262
 
263
  now = _now()
264
+ row = {
265
+ **data,
266
+ "source_config": source_config,
267
+ "id": str(uuid.uuid4()),
268
+ "created_at": now,
269
+ "updated_at": now,
270
+ }
271
 
272
  if buffered:
273
  _get_pending(store, "aliases").append(row)
 
276
  df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
277
  store.set_table("aliases", df)
278
 
279
+ # Update index only if it has already been built. If it is empty, get_alias
280
+ # should keep using the DataFrame/pending slow path instead of a partial index.
281
+ if _alias_index and row.get("status") != "rejected":
282
  _alias_index[key] = row
283
  return row
284
 
 
292
  df.loc[df["id"] == alias_id, col] = val
293
  df.loc[df["id"] == alias_id, "updated_at"] = _now()
294
  store.set_table("aliases", df)
295
+ updated = _row_to_dict(df[df["id"] == alias_id].iloc[0])
296
+ # Keep the in-memory index in sync if it was built — otherwise a follow-up
297
+ # add_alias() / get_alias() would see stale canonical data for this key.
298
+ if _alias_index:
299
+ key = (
300
+ updated["entity_type"],
301
+ updated["raw_value"],
302
+ _source_config_key(updated.get("source_config")),
303
+ )
304
+ if updated.get("status") != "rejected":
305
+ _alias_index[key] = updated
306
+ else:
307
+ _alias_index.pop(key, None)
308
+ return updated
309
 
310
 
311
  # ------------------------------------------------------------------
src/eval_card_registry/store/schemas.py CHANGED
@@ -3,13 +3,27 @@ import pandas as pd
3
 
4
 
5
  _SCHEMAS: dict[str, dict] = {
 
 
 
 
 
 
 
 
 
 
 
 
6
  "canonical_models": {
7
  "id": pd.StringDtype(),
8
  "display_name": pd.StringDtype(),
9
  "developer": pd.StringDtype(),
 
10
  "family": pd.StringDtype(),
11
  "architecture": pd.StringDtype(),
12
  "params_billions": "float64",
 
13
  "tags": pd.StringDtype(), # JSON-encoded list
14
  "metadata": pd.StringDtype(), # JSON-encoded dict
15
  "review_status": pd.StringDtype(),
 
3
 
4
 
5
  _SCHEMAS: dict[str, dict] = {
6
+ "canonical_orgs": {
7
+ "id": pd.StringDtype(),
8
+ "display_name": pd.StringDtype(),
9
+ "parent_org_id": pd.StringDtype(),
10
+ "website": pd.StringDtype(),
11
+ "hf_org": pd.StringDtype(),
12
+ "tags": pd.StringDtype(), # JSON-encoded list
13
+ "metadata": pd.StringDtype(), # JSON-encoded dict
14
+ "review_status": pd.StringDtype(),
15
+ "created_at": pd.StringDtype(),
16
+ "updated_at": pd.StringDtype(),
17
+ },
18
  "canonical_models": {
19
  "id": pd.StringDtype(),
20
  "display_name": pd.StringDtype(),
21
  "developer": pd.StringDtype(),
22
+ "org_id": pd.StringDtype(),
23
  "family": pd.StringDtype(),
24
  "architecture": pd.StringDtype(),
25
  "params_billions": "float64",
26
+ "parent_model_id": pd.StringDtype(),
27
  "tags": pd.StringDtype(), # JSON-encoded list
28
  "metadata": pd.StringDtype(), # JSON-encoded dict
29
  "review_status": pd.StringDtype(),