Spaces:

evaleval
/

entity-registry

Running

App Files Files Community

j-chim commited on 26 days ago

Commit

e70d416

verified ·

1 Parent(s): 5922ac4

Upload folder using huggingface_hub

Browse files

Files changed (21) hide show

packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc +0 -0
packages/eval-entity-resolver/src/eval_entity_resolver/alias_store.py +61 -3
packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc +0 -0
packages/eval-entity-resolver/src/eval_entity_resolver/strategies/fuzzy.py +192 -2
src/eval_card_registry/__pycache__/cli.cpython-314.pyc +0 -0
src/eval_card_registry/__pycache__/main.cpython-314.pyc +0 -0
src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc +0 -0
src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc +0 -0
src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc +0 -0
src/eval_card_registry/api/routes_entities.py +28 -4
src/eval_card_registry/api/schemas.py +29 -1
src/eval_card_registry/cli.py +324 -58
src/eval_card_registry/main.py +2 -0
src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc +0 -0
src/eval_card_registry/services/resolution_service.py +47 -3
src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc +0 -0
src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc +0 -0
src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc +0 -0
src/eval_card_registry/store/hf_store.py +2 -0
src/eval_card_registry/store/queries.py +45 -9
src/eval_card_registry/store/schemas.py +14 -0

packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc CHANGED Viewed

Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/__pycache__/alias_store.cpython-314.pyc differ

packages/eval-entity-resolver/src/eval_entity_resolver/alias_store.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -7,6 +8,8 @@ from typing import Optional
 import pandas as pd
 _SCHEMA = {
     "id": pd.StringDtype(),
@@ -71,15 +74,41 @@ class AliasStore:
     @classmethod
     def from_parquet(cls, path: str | Path, read_only: bool = False) -> "AliasStore":
         p = Path(path) / "aliases.parquet"
-        if p.exists():
             df = pd.read_parquet(p)
-        else:
             df = _empty_df()
         return cls(df, read_only=read_only)
     @classmethod
     def from_hf(cls, repo_id: str, read_only: bool = False) -> "AliasStore":
         from huggingface_hub import hf_hub_download
         try:
             local = hf_hub_download(
@@ -88,7 +117,36 @@ class AliasStore:
                 repo_type="dataset",
             )
             df = pd.read_parquet(local)
-        except Exception:
             df = _empty_df()
         return cls(df, read_only=read_only)

 from __future__ import annotations
+import logging
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
 import pandas as pd
+logger = logging.getLogger(__name__)
 _SCHEMA = {
     "id": pd.StringDtype(),
     @classmethod
     def from_parquet(cls, path: str | Path, read_only: bool = False) -> "AliasStore":
         p = Path(path) / "aliases.parquet"
+        if not p.exists():
+            # Missing dir / missing file is the legitimate "fresh store"
+            # case (used by tests and first-time seed runs), so log at INFO
+            # instead of WARNING — but still surface it.
+            logger.info(
+                "AliasStore.from_parquet: %s not found; falling back to empty store",
+                p,
+            )
+            return cls(_empty_df(), read_only=read_only)
+        try:
             df = pd.read_parquet(p)
+        except (OSError, ValueError) as exc:
+            # OSError covers permission / IO errors; ValueError is what
+            # pyarrow raises for corrupt parquet (ArrowInvalid is a
+            # ValueError subclass). Keep the empty-store fallback so a
+            # corrupt local cache doesn't hard-crash callers, but log so
+            # the failure isn't silent.
+            logger.warning(
+                "AliasStore.from_parquet: failed to read %s (%s: %s); "
+                "falling back to empty store",
+                p,
+                type(exc).__name__,
+                exc,
+            )
             df = _empty_df()
         return cls(df, read_only=read_only)
     @classmethod
     def from_hf(cls, repo_id: str, read_only: bool = False) -> "AliasStore":
         from huggingface_hub import hf_hub_download
+        from huggingface_hub.errors import (
+            EntryNotFoundError,
+            HfHubHTTPError,
+            RepositoryNotFoundError,
+        )
         try:
             local = hf_hub_download(
                 repo_type="dataset",
             )
             df = pd.read_parquet(local)
+        except (
+            RepositoryNotFoundError,
+            EntryNotFoundError,
+            HfHubHTTPError,
+            FileNotFoundError,
+            OSError,
+            ValueError,
+        ) as exc:
+            # Specific catches:
+            #   - RepositoryNotFoundError: repo missing or auth failure
+            #     (HF returns 401 disguised as 404 when token is invalid).
+            #   - EntryNotFoundError: repo exists but aliases/part-0.parquet
+            #     hasn't been seeded yet.
+            #   - HfHubHTTPError: catch-all for other HTTP failures
+            #     (network errors, 5xx, rate limits).
+            #   - FileNotFoundError / OSError: filesystem-level errors
+            #     reading the downloaded file.
+            #   - ValueError: pyarrow.lib.ArrowInvalid (parquet corruption)
+            #     subclasses ValueError.
+            # We keep the fallback-to-empty recovery (callers expect the
+            # store to construct), but emit a warning so the failure is
+            # visible — silent fallback was masking auth and corruption
+            # issues during deploys.
+            logger.warning(
+                "AliasStore.from_hf: failed to load aliases from %r (%s: %s); "
+                "falling back to empty store",
+                repo_id,
+                type(exc).__name__,
+                exc,
+            )
             df = _empty_df()
         return cls(df, read_only=read_only)

packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc CHANGED Viewed

Binary files a/packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc and b/packages/eval-entity-resolver/src/eval_entity_resolver/strategies/__pycache__/fuzzy.cpython-314.pyc differ

packages/eval-entity-resolver/src/eval_entity_resolver/strategies/fuzzy.py CHANGED Viewed

@@ -62,6 +62,20 @@ _STRIP_SUFFIX_PATTERNS: list[re.Pattern[str]] = [
 # Known org aliases: {variant_prefix: canonical_prefix}
 # Convention: simplify HF org names (e.g. "deepseek-ai" → "deepseek") to the
 # shorter form used as canonical in this registry.
 _ORG_ALIASES: dict[str, str] = {
     "deepseek-ai": "deepseek",
     "cohereforai": "cohere",
@@ -70,8 +84,152 @@ _ORG_ALIASES: dict[str, str] = {
     "meta-llama": "meta",
     "mistral-ai": "mistralai",
     "nvidia-nemo": "nvidia",
 }
 # Confidence assigned to stem-match results.  Below 1.0 (exact) and 0.95
 # (normalized) so the provenance is clear in the resolution log.
 _STEM_CONFIDENCE = 0.90
@@ -113,6 +271,13 @@ def fuzzy_match(
     Returns ``(canonical_id, confidence)``; canonical_id is None on no match.
     """
     candidates_to_try: list[str] = []
     # 1. Suffix stripping (may produce multiple stems: strip one, strip two, etc.)
@@ -124,13 +289,38 @@ def fuzzy_match(
         if double:
             candidates_to_try.append(double)
-    # 2. Org normalization — on both original and stripped forms
     for val in [raw_value] + candidates_to_try[:]:
         rewritten = _normalize_org(val)
         if rewritten:
             candidates_to_try.append(rewritten)
-    # 3. Check each candidate against exact then normalized lookups.
     # Scoped-aware: config-scoped aliases for ``source_config`` count as
     # candidates; unrelated scoped aliases are excluded.
     norm_lookup = alias_store.get_normalized_lookup(entity_type, source_config)

 # Known org aliases: {variant_prefix: canonical_prefix}
 # Convention: simplify HF org names (e.g. "deepseek-ai" → "deepseek") to the
 # shorter form used as canonical in this registry.
+#
+# Zhipu/Z.ai cluster: the GLM-family canonical org is `zai` (short form used
+# in this registry for canonical_ids like `zai/glm-4.5`). HF and various
+# leaderboards spell it as `zhipu`, `zhipu-ai`, `z-ai`, or `zai-org` — all
+# refer to the same Beijing AI startup behind GLM.
+#
+# Moonshot AI cluster: canonical org is `moonshotai` (matches HF
+# `moonshotai/Kimi-*` namespace); aliases cover `moonshot` and `moonshot-ai`
+# spellings seen in the corpus.
+#
+# `alibaba` → `qwen` was considered but skipped: the corpus has 1
+# non-Qwen entry (`alibaba__mineru2-pipeline`) which would be wrongly
+# rewritten. Qwen models under `alibaba/` are handled via explicit
+# overrides instead.
 _ORG_ALIASES: dict[str, str] = {
     "deepseek-ai": "deepseek",
     "cohereforai": "cohere",
     "meta-llama": "meta",
     "mistral-ai": "mistralai",
     "nvidia-nemo": "nvidia",
+    # Zhipu/Z.ai → zai
+    "zhipu": "zai",
+    "zhipu-ai": "zai",
+    "z-ai": "zai",
+    "zai-org": "zai",
+    # Moonshot → moonshotai
+    "moonshot": "moonshotai",
+    "moonshot-ai": "moonshotai",
 }
+# Host / gateway / placeholder prefixes that should be DROPPED entirely
+# (not rewritten to a canonical org). These are not model authors —
+# they're hosting platforms, gateways, or placeholders for missing
+# developer fields. When raw_value uses one of these as the org prefix,
+# the resolver tries the bare suffix in addition to the full string.
+#
+# Identified from corpus surveys: alphaxiv leaderboard uses `unknown/`
+# when developer field is absent; Bedrock/Vertex/Azure/Fireworks/etc.
+# are inference platforms re-hosting other companies' models.
+_HOST_PREFIXES_TO_STRIP: set[str] = {
+    "unknown",
+    "bedrock", "amazon-bedrock", "aws-bedrock",
+    "azure", "azure-openai", "azure-cognitive-services",
+    "vertex", "google-vertex", "vertex-anthropic",
+    "fireworks", "fireworks-ai",
+    "groq",
+    "together", "togetherai", "together-ai",
+    "openrouter",
+    "perplexity-agent",
+    "deepinfra", "anyscale", "novita", "novita-ai", "replicate",
+    "ollama", "ollama-cloud",
+    "github-models", "github-copilot",
+    "lambda", "baseten", "modal", "runpod", "cerebras",
+    "sap-ai-core", "cloudflare-ai-gateway", "aihubmix",
+    "kilo", "vercel", "llmgateway", "poe",
+}
+def _drop_duplicated_org_prefix(value: str) -> str | None:
+    """Detect and collapse a repeated-org-prefix typo.
+    Recognized shapes (token equality is case-insensitive, but the
+    returned string preserves the original casing of `value` so the
+    downstream lookups can still match exact aliases):
+      - ``<org>/<org>-<rest>``           → ``<org>/<rest>``
+      - ``<org>/<org>_<rest>``           → ``<org>/<rest>``
+      - ``<org>/<org>/<rest>``           → ``<org>/<rest>`` (literal double slash)
+      - ``<org>__<org>-<rest>``          → ``<org>__<rest>`` (slug form;
+        the pipeline rewrites ``/`` → ``__`` for route_ids and the resolver
+        may receive either)
+      - ``<org>__<org>__<rest>``         → ``<org>__<rest>`` (slug form
+        of the literal double-slash variant)
+    Returns ``None`` when the prefix is not duplicated, or when the
+    repeated-prefix slug shape is followed by something that doesn't
+    cleanly separate (e.g. ``gpt-4/gpt-4-turbo`` — the second ``gpt-4``
+    is the START of the model name, not a duplicated prefix).
+    The match requires exact token equality of the two leading tokens.
+    A substring overlap (``gpt-4`` ⊂ ``gpt-4-turbo``) is intentionally
+    NOT enough — that's a real two-segment HF path, not a typo.
+    To disambiguate the org-typo case (``openai/openai-o1``) from the
+    model-family-prefix case (``gpt-4/gpt-4-turbo``): the heuristic
+    only fires when the leading org token has no internal hyphen.
+    Real org names (``openai``, ``moonshotai``, ``anthropic``) are
+    single tokens; model-family prefixes (``gpt-4``, ``llama-3``,
+    ``claude-opus-4-5``) contain hyphens. This is imperfect — a
+    hyphenated org like ``mistral-ai`` would slip through — but
+    those are already captured upstream by the org-alias pass.
+    """
+    if not value:
+        return None
+    # Slash forms first (canonical HF path style).
+    if "/" in value:
+        first_slash = value.index("/")
+        org = value[:first_slash]
+        rest = value[first_slash + 1:]
+        if not org or not rest:
+            return None
+        # Skip when the leading token contains a hyphen — likely a
+        # model-family prefix (e.g. `gpt-4/gpt-4-turbo`), not a
+        # duplicated-org typo. Hyphenated orgs like `mistral-ai` are
+        # canonicalized via the org-alias pass first.
+        if "-" in org:
+            return None
+        org_lower = org.lower()
+        # `<org>/<org>/<rest>` literal double slash
+        if "/" in rest:
+            second, after = rest.split("/", 1)
+            if second.lower() == org_lower and after:
+                return f"{org}/{after}"
+        # `<org>/<org>-<rest>` and `<org>/<org>_<rest>`
+        for sep in ("-", "_"):
+            prefix = org_lower + sep
+            if rest.lower().startswith(prefix) and len(rest) > len(prefix):
+                return f"{org}/{rest[len(prefix):]}"
+    # Slug forms (route_id style with `__`).
+    if "__" in value:
+        first = value.index("__")
+        org = value[:first]
+        rest = value[first + 2:]
+        if not org or not rest:
+            return None
+        # Same hyphen-in-org guard (see slash branch above).
+        if "-" in org:
+            return None
+        org_lower = org.lower()
+        # `<org>__<org>__<rest>`
+        if "__" in rest:
+            second, after = rest.split("__", 1)
+            if second.lower() == org_lower and after:
+                return f"{org}__{after}"
+        # `<org>__<org>-<rest>` (and `_<rest>` — note we already consumed `__`,
+        # so the next separator is a single `-` or `_`).
+        for sep in ("-", "_"):
+            prefix = org_lower + sep
+            if rest.lower().startswith(prefix) and len(rest) > len(prefix):
+                return f"{org}__{rest[len(prefix):]}"
+    return None
+def _drop_host_prefix(value: str) -> str | None:
+    """If value's developer prefix is a known hosting platform, return the
+    bare suffix portion (everything after the first separator). Otherwise None.
+    Handles both `host/model` and `host.model` separators."""
+    if "/" in value:
+        org, rest = value.split("/", 1)
+        if org.lower() in _HOST_PREFIXES_TO_STRIP and rest:
+            return rest
+    if "." in value:
+        # Bedrock-style: "anthropic.claude-3-5-sonnet" → "anthropic.claude-3-5-sonnet"
+        # is itself a host format, but the prefix BEFORE the dot is the host.
+        # Only strip if everything-before-first-dot is a host name.
+        first_dot = value.index(".")
+        org = value[:first_dot]
+        rest = value[first_dot + 1:]
+        if org.lower() in _HOST_PREFIXES_TO_STRIP and rest:
+            return rest
+    return None
 # Confidence assigned to stem-match results.  Below 1.0 (exact) and 0.95
 # (normalized) so the provenance is clear in the resolution log.
 _STEM_CONFIDENCE = 0.90
     Returns ``(canonical_id, confidence)``; canonical_id is None on no match.
     """
+    # The heuristics below are intentionally model-specific: they strip
+    # hosting prefixes, org aliases, dated model snapshots, and inference-mode
+    # suffixes. Applying them to benchmarks/metrics/harnesses can merge
+    # unrelated entities that merely share a host-like prefix or model-ish tail.
+    if entity_type != "model":
+        return None, 0.0
     candidates_to_try: list[str] = []
     # 1. Suffix stripping (may produce multiple stems: strip one, strip two, etc.)
         if double:
             candidates_to_try.append(double)
+    # 2. Host-prefix dropping — if raw_value's developer prefix is a known
+    # hosting platform / gateway / placeholder, also try the bare suffix.
+    # Apply on the original AND any suffix-stripped forms.
+    for val in [raw_value] + candidates_to_try[:]:
+        bare = _drop_host_prefix(val)
+        if bare:
+            candidates_to_try.append(bare)
+            # The bare form might itself need suffix stripping
+            stripped_bare = _strip_suffix(bare)
+            if stripped_bare:
+                candidates_to_try.append(stripped_bare)
+    # 3. Duplicated-org-prefix collapse — catches typos like
+    # `moonshotai/moonshotai-kimi-k2-instruct` (and the slug-form
+    # `moonshotai__moonshotai-kimi-k2-instruct`). Runs AFTER suffix /
+    # host strip so the deduped form goes through the rest of the
+    # pipeline (org alias + lookup), and BEFORE org alias so the
+    # collapsed string can pick up `_ORG_ALIASES` rewriting on the
+    # next step.
+    for val in [raw_value] + candidates_to_try[:]:
+        deduped = _drop_duplicated_org_prefix(val)
+        if deduped:
+            candidates_to_try.append(deduped)
+    # 4. Org normalization — on original, suffix-stripped, host-stripped,
+    # and duplicate-org-collapsed forms.
     for val in [raw_value] + candidates_to_try[:]:
         rewritten = _normalize_org(val)
         if rewritten:
             candidates_to_try.append(rewritten)
+    # 5. Check each candidate against exact then normalized lookups.
     # Scoped-aware: config-scoped aliases for ``source_config`` count as
     # candidates; unrelated scoped aliases are excluded.
     norm_lookup = alias_store.get_normalized_lookup(entity_type, source_config)

src/eval_card_registry/__pycache__/cli.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/__pycache__/cli.cpython-314.pyc and b/src/eval_card_registry/__pycache__/cli.cpython-314.pyc differ

src/eval_card_registry/__pycache__/main.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/__pycache__/main.cpython-314.pyc and b/src/eval_card_registry/__pycache__/main.cpython-314.pyc differ

src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/routes_entities.cpython-314.pyc differ

src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/routes_orgs.cpython-314.pyc differ

src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc and b/src/eval_card_registry/api/__pycache__/schemas.cpython-314.pyc differ

src/eval_card_registry/api/routes_entities.py CHANGED Viewed

@@ -52,6 +52,15 @@ def _decode(entity: dict) -> dict:
     return out
 # ------------------------------------------------------------------
 # Models
 # ------------------------------------------------------------------
@@ -63,17 +72,29 @@ def list_models(
     review_status: Optional[ReviewStatus] = None,
     store: RegistryStore = Depends(get_store),
 ):
-    return [_decode(e) for e in queries.list_entities(store, "canonical_models", search=search, review_status=review_status, developer=developer)]
 @router.get("/models/{model_id:path}")
 def get_model(model_id: str, store: RegistryStore = Depends(get_store)):
-    return _decode(_get_or_404(store, "canonical_models", model_id))
 @router.post("/models", status_code=201, dependencies=_writable)
 def create_model(body: ModelCreate, store: RegistryStore = Depends(get_store)):
-    return _decode(queries.upsert_entity(store, "canonical_models", _encode(body.model_dump())))
 @router.patch("/models/{model_id:path}", dependencies=_writable)
@@ -81,7 +102,10 @@ def patch_model(model_id: str, body: ModelPatch, store: RegistryStore = Depends(
     _get_or_404(store, "canonical_models", model_id)
     data = {k: v for k, v in body.model_dump().items() if v is not None}
     data["id"] = model_id
-    return _decode(queries.upsert_entity(store, "canonical_models", _encode(data)))
 # ------------------------------------------------------------------

     return out
+def _decode_model(store: RegistryStore, entity: dict) -> dict:
+    out = _decode(entity)
+    org_id = out.get("org_id")
+    if org_id and store.has_table("canonical_orgs"):
+        org = queries.get_entity(store, "canonical_orgs", org_id)
+        out["developer"] = org.get("display_name") if org else None
+    return out
 # ------------------------------------------------------------------
 # Models
 # ------------------------------------------------------------------
     review_status: Optional[ReviewStatus] = None,
     store: RegistryStore = Depends(get_store),
 ):
+    return [
+        _decode_model(store, e)
+        for e in queries.list_entities(
+            store,
+            "canonical_models",
+            search=search,
+            review_status=review_status,
+            developer=developer,
+        )
+    ]
 @router.get("/models/{model_id:path}")
 def get_model(model_id: str, store: RegistryStore = Depends(get_store)):
+    return _decode_model(store, _get_or_404(store, "canonical_models", model_id))
 @router.post("/models", status_code=201, dependencies=_writable)
 def create_model(body: ModelCreate, store: RegistryStore = Depends(get_store)):
+    return _decode_model(
+        store,
+        queries.upsert_entity(store, "canonical_models", _encode(body.model_dump())),
+    )
 @router.patch("/models/{model_id:path}", dependencies=_writable)
     _get_or_404(store, "canonical_models", model_id)
     data = {k: v for k, v in body.model_dump().items() if v is not None}
     data["id"] = model_id
+    return _decode_model(
+        store,
+        queries.upsert_entity(store, "canonical_models", _encode(data)),
+    )
 # ------------------------------------------------------------------

src/eval_card_registry/api/schemas.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Literal, Optional
 from pydantic import BaseModel
-EntityType = Literal["benchmark", "model", "metric", "harness"]
 ReviewStatus = Literal["draft", "reviewed"]
 AliasStatus = Literal["auto", "uncertain", "confirmed", "rejected"]
@@ -22,6 +22,7 @@ class ResolveResponse(BaseModel):
     confidence: float
     created_new: bool
     review_status: Optional[str]
 # --- Entities ---
@@ -30,9 +31,11 @@ class ModelCreate(BaseModel):
     id: str
     display_name: str
     developer: Optional[str] = None
     family: Optional[str] = None
     architecture: Optional[str] = None
     params_billions: Optional[float] = None
     tags: list[str] = []
     metadata: dict[str, Any] = {}
     review_status: str = "draft"
@@ -41,9 +44,11 @@ class ModelCreate(BaseModel):
 class ModelPatch(BaseModel):
     display_name: Optional[str] = None
     developer: Optional[str] = None
     family: Optional[str] = None
     architecture: Optional[str] = None
     params_billions: Optional[float] = None
     tags: Optional[list[str]] = None
     metadata: Optional[dict[str, Any]] = None
     review_status: Optional[str] = None
@@ -108,6 +113,29 @@ class HarnessPatch(BaseModel):
     review_status: Optional[str] = None
 # --- Aliases ---
 class AliasPatch(BaseModel):

 from pydantic import BaseModel
+EntityType = Literal["benchmark", "model", "metric", "harness", "org"]
 ReviewStatus = Literal["draft", "reviewed"]
 AliasStatus = Literal["auto", "uncertain", "confirmed", "rejected"]
     confidence: float
     created_new: bool
     review_status: Optional[str]
+    parent_canonical_id: Optional[str] = None
 # --- Entities ---
     id: str
     display_name: str
     developer: Optional[str] = None
+    org_id: Optional[str] = None
     family: Optional[str] = None
     architecture: Optional[str] = None
     params_billions: Optional[float] = None
+    parent_model_id: Optional[str] = None
     tags: list[str] = []
     metadata: dict[str, Any] = {}
     review_status: str = "draft"
 class ModelPatch(BaseModel):
     display_name: Optional[str] = None
     developer: Optional[str] = None
+    org_id: Optional[str] = None
     family: Optional[str] = None
     architecture: Optional[str] = None
     params_billions: Optional[float] = None
+    parent_model_id: Optional[str] = None
     tags: Optional[list[str]] = None
     metadata: Optional[dict[str, Any]] = None
     review_status: Optional[str] = None
     review_status: Optional[str] = None
+# --- Orgs ---
+class OrgCreate(BaseModel):
+    id: str
+    display_name: str
+    parent_org_id: Optional[str] = None
+    website: Optional[str] = None
+    hf_org: Optional[str] = None
+    tags: list[str] = []
+    metadata: dict[str, Any] = {}
+    review_status: str = "draft"
+class OrgPatch(BaseModel):
+    display_name: Optional[str] = None
+    parent_org_id: Optional[str] = None
+    website: Optional[str] = None
+    hf_org: Optional[str] = None
+    tags: Optional[list[str]] = None
+    metadata: Optional[dict[str, Any]] = None
+    review_status: Optional[str] = None
 # --- Aliases ---
 class AliasPatch(BaseModel):

src/eval_card_registry/cli.py CHANGED Viewed

@@ -6,14 +6,28 @@ Commands:
   stats     Print registry summary
   sync      Batch sync one or all EEE configs → eval_results table
 """
 from pathlib import Path
 from typing import Optional
 import typer
 import yaml
 from eval_card_registry.store.hf_store import get_store
-from eval_card_registry.store import queries
 from eval_card_registry.store.queries import _is_na
 app = typer.Typer(help="eval-card-registry CLI")
@@ -34,6 +48,11 @@ def _load_store():
 def seed(
     local: bool = typer.Option(False, "--local", help="Write to fixtures/ instead of HF Hub"),
     seed_dir: str = typer.Option("./seed", "--seed-dir"),
 ):
     """Load known canonical entities from seed YAML files."""
     import os
@@ -43,11 +62,171 @@ def seed(
     store = _load_store()
     seed_path = Path(seed_dir)
     # table name, yaml file, label, entity_type (for alias creation)
     seed_specs = [
         ("canonical_benchmarks", seed_path / "benchmarks.yaml", "benchmarks", "benchmark"),
         ("canonical_metrics", seed_path / "metrics.yaml", "metrics", "metric"),
         ("eval_harnesses", seed_path / "harnesses.yaml", "harnesses", "harness"),
     ]
     alias_count = 0
@@ -55,23 +234,44 @@ def seed(
     # Alias key: (raw_value, entity_type, canonical_id, source_config)
     seed_snapshot: list[tuple[str, str, set[str], set[tuple[str, str, str, Optional[str]]]]] = []
     for table, yaml_file, label, entity_type in seed_specs:
-        if not yaml_file.exists():
-            typer.echo(f"  [skip] {yaml_file} not found")
-            continue
-        with open(yaml_file) as f:
-            items = yaml.safe_load(f) or []
         yaml_ids: set[str] = set()
         yaml_alias_keys: set[tuple[str, str, str, Optional[str]]] = set()
-        for item in items:
             # Pop 'aliases' / 'scoped_aliases' before upserting — not table columns.
             extra_aliases = item.pop("aliases", []) or []
             scoped_aliases = item.pop("scoped_aliases", {}) or {}
-            queries.upsert_entity(store, table, item)
-            canonical_id = item["id"]
-            display_name = item.get("display_name", "")
             yaml_ids.add(canonical_id)
             # Global aliases (source_config=None): matched regardless of caller's source_config.
@@ -102,63 +302,129 @@ def seed(
                         "strategy": "seed",
                         "confidence": 1.0,
                         "notes": None,
-                    })
                     alias_count += 1
                 except ValueError:
-                    pass  # alias already exists (e.g. re-seeding)
         seed_snapshot.append((table, entity_type, yaml_ids, yaml_alias_keys))
         typer.echo(f"  {label}: {len(items)}")
-    # Remove seed-originated entities and aliases that are no longer in the YAML.
-    # Only touches rows that were created by seed (strategy == "seed"), never
-    # sync-created aliases or auto-draft entities.
     removed_entities = 0
     removed_aliases = 0
-    for table, entity_type, yaml_ids, yaml_alias_keys in seed_snapshot:
-        # Remove stale seed aliases for this entity type
-        aliases_df = store.table("aliases")
-        seed_mask = (aliases_df["strategy"] == "seed") & (aliases_df["entity_type"] == entity_type)
-        if seed_mask.any():
-            seed_aliases = aliases_df[seed_mask]
-            stale_alias_mask = seed_mask.copy()
-            for idx in seed_aliases.index:
-                row = seed_aliases.loc[idx]
-                sc = row.get("source_config")
-                if _is_na(sc):
-                    sc = None
-                key = (row["raw_value"], row["entity_type"], row["canonical_id"], sc)
-                if key in yaml_alias_keys:
-                    stale_alias_mask[idx] = False
-            n_stale = stale_alias_mask.sum()
-            if n_stale > 0:
-                store.set_table("aliases", aliases_df[~stale_alias_mask].reset_index(drop=True))
-                removed_aliases += int(n_stale)
-        # Remove stale seed entities — only those with review_status "reviewed"
-        # that came from seed and are no longer in the YAML.
-        entity_df = store.table(table)
-        if len(entity_df) > 0:
-            stale = entity_df["id"].isin(yaml_ids)
-            stale_entities = entity_df[~stale & (entity_df["review_status"] == "reviewed")]
-            # Only remove if every alias for this entity is also seed-originated,
-            # meaning it wasn't referenced by sync data.
-            current_aliases = store.table("aliases")
-            for eid in stale_entities["id"]:
-                entity_aliases = current_aliases[
-                    (current_aliases["canonical_id"] == eid)
-                    & (current_aliases["entity_type"] == entity_type)
-                ]
-                if len(entity_aliases) == 0 or (entity_aliases["strategy"] == "seed").all():
-                    entity_df = entity_df[entity_df["id"] != eid]
-                    # Also remove any remaining aliases pointing to it
-                    current_aliases = current_aliases[
-                        ~((current_aliases["canonical_id"] == eid)
-                          & (current_aliases["entity_type"] == entity_type))
                     ]
-                    removed_entities += 1
-            store.set_table(table, entity_df.reset_index(drop=True))
-            store.set_table("aliases", current_aliases.reset_index(drop=True))
     typer.echo(f"  aliases: {alias_count} added, {removed_aliases} removed")
     if removed_entities:

   stats     Print registry summary
   sync      Batch sync one or all EEE configs → eval_results table
 """
+import json
 from pathlib import Path
 from typing import Optional
 import typer
 import yaml
+def _json_encode_if_needed(value):
+    """Encode lists/dicts as JSON strings; pass through anything else.
+    seed/models.yaml uses YAML-native lists for `tags` (e.g. `["open-weight"]`)
+    while seed/benchmarks.yaml stores them pre-encoded as strings (e.g.
+    `'["instruction-following"]'`). The canonical_* parquet columns are all
+    VARCHAR, so we coerce on the way in to keep both formats supported.
+    """
+    if isinstance(value, (list, dict)):
+        return json.dumps(value)
+    return value
 from eval_card_registry.store.hf_store import get_store
+from eval_card_registry.store import queries, schemas
 from eval_card_registry.store.queries import _is_na
 app = typer.Typer(help="eval-card-registry CLI")
 def seed(
     local: bool = typer.Option(False, "--local", help="Write to fixtures/ instead of HF Hub"),
     seed_dir: str = typer.Option("./seed", "--seed-dir"),
+    prune_stale: bool = typer.Option(
+        False,
+        "--prune-stale/--no-prune-stale",
+        help="Remove reviewed seed entities and seed aliases absent from the current YAML snapshot.",
+    ),
 ):
     """Load known canonical entities from seed YAML files."""
     import os
     store = _load_store()
     seed_path = Path(seed_dir)
+    # ------------------------------------------------------------------
+    # Models — three-layer load from seed/models/:
+    #   sources/*.generated.yaml  → external catalog data (e.g. models.dev),
+    #                               flat lists, never hand-edited
+    #   core.yaml                 → curated canonicals (the source of truth),
+    #                               flat list OR {skip_ids, entries} dict
+    #   enrichments/aliases.yaml  → optional alias-only entries ({id, aliases})
+    #                               that union onto whatever exists
+    #
+    # Merge order: sources → core → enrichments. Field-level merge per entry
+    # (aliases / tags UNION; other scalars prefer non-empty, last-write-wins).
+    # `skip_ids` from core drops generated entries we don't want.
+    # ------------------------------------------------------------------
+    def _load_models_merged() -> list[dict]:
+        models_dir = seed_path / "models"
+        sources_dir = models_dir / "sources"
+        core_file = models_dir / "core.yaml"
+        enrichments_file = models_dir / "enrichments" / "aliases.yaml"
+        source_entries: list[dict] = []
+        core_entries: list[dict] = []
+        enrichment_entries: list[dict] = []
+        skip_ids: set[str] = set()
+        if sources_dir.is_dir():
+            for src_path in sorted(sources_dir.glob("*.generated.yaml")):
+                with open(src_path) as f:
+                    loaded = yaml.safe_load(f) or []
+                if not isinstance(loaded, list):
+                    raise typer.BadParameter(f"{src_path} must be a flat list")
+                source_entries.extend(loaded)
+        skip_source_ids: set[str] = set()
+        if core_file.exists():
+            with open(core_file) as f:
+                loaded = yaml.safe_load(f) or {}
+            if isinstance(loaded, list):
+                core_entries = loaded
+            elif isinstance(loaded, dict):
+                core_entries = loaded.get("entries", []) or []
+                skip_ids = set(loaded.get("skip_ids", []) or [])
+                # `skip_source_ids` drops these ids from sources/enrichments only,
+                # leaving core entries authoritative. Used when models.dev (or any
+                # auto-generated source) ships bad aliases for a model that core.yaml
+                # curates correctly — otherwise the loader's UNION-merge would
+                # re-introduce the bad aliases on every refresh.
+                skip_source_ids = set(loaded.get("skip_source_ids", []) or [])
+            else:
+                raise typer.BadParameter(f"{core_file} unexpected shape {type(loaded)}")
+        if enrichments_file.exists():
+            with open(enrichments_file) as f:
+                loaded = yaml.safe_load(f) or []
+            if not isinstance(loaded, list):
+                raise typer.BadParameter(f"{enrichments_file} must be a flat list")
+            enrichment_entries = loaded
+        def _merge_into(target: dict, src: dict) -> dict:
+            """Merge two entries with the same canonical_id.
+            Field-level merge policy:
+            - `aliases`: UNION (case-insensitive dedup).
+            - `tags`: UNION (case-insensitive dedup). Both YAML-list and
+              JSON-encoded-string forms supported. Protects against session
+              additions overwriting `[open-weight, moe]` with `[open-weight]`.
+            - Other scalars: prefer non-empty across the pair; when both
+              sides have a non-empty value, last-write-wins. Protects against
+              session-batch entries that omit `architecture` /
+              `params_billions` from silently overwriting earlier rich entries.
+            "Empty" means: None, "", [], {}, or default-looking '{}' / '[]'.
+            """
+            import json as _json
+            existing_aliases = list(target.get("aliases") or [])
+            existing_lc = {a.lower() for a in existing_aliases if a}
+            new_aliases = list(src.get("aliases") or [])
+            for a in new_aliases:
+                if a and a.lower() not in existing_lc:
+                    existing_aliases.append(a)
+                    existing_lc.add(a.lower())
+            def _decode_list_field(v):
+                """tags / metadata may be either YAML-list or JSON-encoded
+                string. Return a list (best-effort) and a boolean indicating
+                whether to re-encode on write."""
+                if v is None:
+                    return [], False
+                if isinstance(v, list):
+                    return list(v), False
+                if isinstance(v, str):
+                    s = v.strip()
+                    if not s or s in ("[]", "null"):
+                        return [], True
+                    try:
+                        d = _json.loads(s)
+                        if isinstance(d, list):
+                            return list(d), True
+                    except (ValueError, TypeError):
+                        pass
+                return [v], False
+            # Union tags (handles both list and JSON-string formats)
+            tgt_tags, tgt_was_json = _decode_list_field(target.get("tags"))
+            src_tags, src_was_json = _decode_list_field(src.get("tags"))
+            seen_tags_lc = {str(t).lower() for t in tgt_tags}
+            for t in src_tags:
+                if t is not None and str(t).lower() not in seen_tags_lc:
+                    tgt_tags.append(t)
+                    seen_tags_lc.add(str(t).lower())
+            # Re-encode if either source was a JSON string (the parquet column
+            # is VARCHAR; _json_encode_if_needed downstream handles either).
+            tags_merged = _json.dumps(tgt_tags) if (tgt_was_json or src_was_json) else tgt_tags
+            def _is_empty(v) -> bool:
+                if v is None:
+                    return True
+                if isinstance(v, (list, dict)) and len(v) == 0:
+                    return True
+                if isinstance(v, str) and v.strip() in ("", "[]", "{}"):
+                    return True
+                return False
+            merged = dict(target)
+            for k, v in src.items():
+                if k in ("aliases", "tags"):
+                    continue  # handled separately
+                if _is_empty(v):
+                    continue
+                merged[k] = v
+            merged["aliases"] = existing_aliases
+            merged["tags"] = tags_merged
+            return merged
+        by_id: dict[str, dict] = {}
+        def _absorb(entries: list[dict], extra_skip: set[str] = frozenset()) -> None:
+            drop = skip_ids | extra_skip
+            for e in entries:
+                if "id" not in e:
+                    raise typer.BadParameter(f"models seed entry missing id: {e!r}")
+                if e["id"] in drop:
+                    continue
+                if e["id"] in by_id:
+                    by_id[e["id"]] = _merge_into(by_id[e["id"]], e)
+                else:
+                    by_id[e["id"]] = e
+        # Sources/enrichments respect both skip_ids and skip_source_ids;
+        # core entries respect only skip_ids so curated overrides always apply.
+        _absorb(source_entries, extra_skip=skip_source_ids)
+        _absorb(core_entries)
+        _absorb(enrichment_entries, extra_skip=skip_source_ids)
+        return list(by_id.values())
     # table name, yaml file, label, entity_type (for alias creation)
     seed_specs = [
+        ("canonical_orgs", seed_path / "orgs.yaml", "orgs", "org"),
         ("canonical_benchmarks", seed_path / "benchmarks.yaml", "benchmarks", "benchmark"),
         ("canonical_metrics", seed_path / "metrics.yaml", "metrics", "metric"),
         ("eval_harnesses", seed_path / "harnesses.yaml", "harnesses", "harness"),
+        # Models: load via the merge helper; pass a sentinel path that
+        # signals the loop below to invoke _load_models_merged() instead of
+        # reading a single YAML file.
+        ("canonical_models", "__merged_models__", "models", "model"),
     ]
     alias_count = 0
     # Alias key: (raw_value, entity_type, canonical_id, source_config)
     seed_snapshot: list[tuple[str, str, set[str], set[tuple[str, str, str, Optional[str]]]]] = []
+    # Build the alias index once so add_alias collision checks are O(1) instead
+    # of O(N) DataFrame mask scans. Combined with buffered=True below, this
+    # avoids the O(N²) pd.concat-per-row cost on ~1k entities + ~13k aliases.
+    queries._rebuild_alias_index(store)
     for table, yaml_file, label, entity_type in seed_specs:
+        table_columns = set(schemas.empty(table).columns)
+        if yaml_file == "__merged_models__":
+            items = _load_models_merged()
+            if not items:
+                typer.echo(f"  [skip] no model entries found in seed/models.yaml or _overrides/")
+                continue
+        else:
+            if not yaml_file.exists():
+                typer.echo(f"  [skip] {yaml_file} not found")
+                continue
+            with open(yaml_file) as f:
+                items = yaml.safe_load(f) or []
         yaml_ids: set[str] = set()
         yaml_alias_keys: set[tuple[str, str, str, Optional[str]]] = set()
+        for original_item in items:
+            item = dict(original_item)
             # Pop 'aliases' / 'scoped_aliases' before upserting — not table columns.
             extra_aliases = item.pop("aliases", []) or []
             scoped_aliases = item.pop("scoped_aliases", {}) or {}
+            # Normalize tags / metadata: YAML may have native lists/dicts, but
+            # the canonical_* parquet columns are VARCHAR, so encode if needed.
+            for col in ("tags", "metadata"):
+                if col in item:
+                    item[col] = _json_encode_if_needed(item[col])
+            entity_item = {k: v for k, v in item.items() if k in table_columns}
+            if "id" not in entity_item:
+                raise typer.BadParameter(f"{label} seed entry is missing required id: {original_item!r}")
+            queries.upsert_entity(store, table, entity_item, buffered=True)
+            canonical_id = entity_item["id"]
+            display_name = entity_item.get("display_name", "")
             yaml_ids.add(canonical_id)
             # Global aliases (source_config=None): matched regardless of caller's source_config.
                         "strategy": "seed",
                         "confidence": 1.0,
                         "notes": None,
+                    }, buffered=True)
                     alias_count += 1
                 except ValueError:
+                    # add_alias raises on uniqueness collision: an alias row
+                    # already exists for (entity_type, raw_value, source_config).
+                    # YAML is the source of truth, so if the existing row points
+                    # at a different canonical_id, this is a YAML rename and we
+                    # must REPOINT the existing row — NOT silently swallow it.
+                    # Without this, stale-removal at the end of seed would then
+                    # delete the row (its old key is no longer in
+                    # yaml_alias_keys), causing total alias loss.
+                    aliases_df = store.table("aliases")
+                    mask = (
+                        (aliases_df["raw_value"] == raw_value)
+                        & (aliases_df["entity_type"] == entity_type)
+                        & (aliases_df["status"] != "rejected")
+                    )
+                    if source_cfg is not None:
+                        mask = mask & (aliases_df["source_config"] == source_cfg)
+                    else:
+                        mask = mask & aliases_df["source_config"].isna()
+                    existing = aliases_df[mask]
+                    if existing.empty:
+                        # Collision came from the pending buffer (this run added
+                        # the same key earlier). For same-canonical re-adds this
+                        # is a no-op; for different-canonical we must mutate the
+                        # pending dict in place so the rename isn't lost on
+                        # flush. _alias_index points at the same dict, so
+                        # updating it here keeps the index consistent.
+                        for p in queries._get_pending(store, "aliases"):
+                            if (p.get("entity_type") == entity_type
+                                    and p.get("raw_value") == raw_value
+                                    and queries._source_config_key(p.get("source_config")) == queries._source_config_key(source_cfg)
+                                    and p.get("status") != "rejected"):
+                                if p["canonical_id"] != canonical_id:
+                                    prev = p["canonical_id"]
+                                    p["canonical_id"] = canonical_id
+                                    p["source_field"] = "seed"
+                                    p["status"] = "confirmed"
+                                    p["strategy"] = "seed"
+                                    p["confidence"] = 1.0
+                                    typer.echo(
+                                        f"  [rename] alias {raw_value!r} ({entity_type}) "
+                                        f"moved {prev!r} -> {canonical_id!r} (pending)"
+                                    )
+                                    alias_count += 1
+                                break
+                        continue
+                    row = existing.iloc[0]
+                    if row["canonical_id"] != canonical_id:
+                        # Rename: repoint the existing row at the new canonical.
+                        queries.update_alias(store, row["id"], {
+                            "canonical_id": canonical_id,
+                            "source_field": "seed",
+                            "status": "confirmed",
+                            "strategy": "seed",
+                            "confidence": 1.0,
+                        })
+                        typer.echo(
+                            f"  [rename] alias {raw_value!r} ({entity_type}) "
+                            f"moved {row['canonical_id']!r} -> {canonical_id!r}"
+                        )
+                        alias_count += 1
+                    # else: identical re-seed of an existing alias — no-op.
         seed_snapshot.append((table, entity_type, yaml_ids, yaml_alias_keys))
         typer.echo(f"  {label}: {len(items)}")
+    # Flush all buffered upserts (entities + aliases) into their tables in a
+    # single pd.concat per table. prune_stale below reads store.table(...)
+    # directly, so this must happen before that block.
+    queries.flush_pending(store)
     removed_entities = 0
     removed_aliases = 0
+    if prune_stale:
+        # Remove seed-originated entities and aliases that are no longer in the YAML.
+        # Only touches rows that were created by seed (strategy == "seed"), never
+        # sync-created aliases or auto-draft entities.
+        for table, entity_type, yaml_ids, yaml_alias_keys in seed_snapshot:
+            # Remove stale seed aliases for this entity type.
+            aliases_df = store.table("aliases")
+            seed_mask = (aliases_df["strategy"] == "seed") & (aliases_df["entity_type"] == entity_type)
+            if seed_mask.any():
+                seed_aliases = aliases_df[seed_mask]
+                stale_alias_mask = seed_mask.copy()
+                for idx in seed_aliases.index:
+                    row = seed_aliases.loc[idx]
+                    sc = row.get("source_config")
+                    if _is_na(sc):
+                        sc = None
+                    key = (row["raw_value"], row["entity_type"], row["canonical_id"], sc)
+                    if key in yaml_alias_keys:
+                        stale_alias_mask[idx] = False
+                n_stale = stale_alias_mask.sum()
+                if n_stale > 0:
+                    store.set_table("aliases", aliases_df[~stale_alias_mask].reset_index(drop=True))
+                    removed_aliases += int(n_stale)
+            # Remove stale seed entities — only those with review_status "reviewed"
+            # that came from seed and are no longer in the YAML.
+            entity_df = store.table(table)
+            if len(entity_df) > 0:
+                stale = entity_df["id"].isin(yaml_ids)
+                stale_entities = entity_df[~stale & (entity_df["review_status"] == "reviewed")]
+                # Only remove if every alias for this entity is also seed-originated,
+                # meaning it wasn't referenced by sync data.
+                current_aliases = store.table("aliases")
+                for eid in stale_entities["id"]:
+                    entity_aliases = current_aliases[
+                        (current_aliases["canonical_id"] == eid)
+                        & (current_aliases["entity_type"] == entity_type)
                     ]
+                    if len(entity_aliases) == 0 or (entity_aliases["strategy"] == "seed").all():
+                        entity_df = entity_df[entity_df["id"] != eid]
+                        # Also remove any remaining aliases pointing to it.
+                        current_aliases = current_aliases[
+                            ~((current_aliases["canonical_id"] == eid)
+                              & (current_aliases["entity_type"] == entity_type))
+                        ]
+                        removed_entities += 1
+                store.set_table(table, entity_df.reset_index(drop=True))
+                store.set_table("aliases", current_aliases.reset_index(drop=True))
     typer.echo(f"  aliases: {alias_count} added, {removed_aliases} removed")
     if removed_entities:

src/eval_card_registry/main.py CHANGED Viewed

@@ -9,6 +9,7 @@ from eval_card_registry.services.log_writer import ResolveLogWriter
 from eval_card_registry.api.routes_resolve import router as resolve_router
 from eval_card_registry.api.routes_entities import router as entities_router
 from eval_card_registry.api.routes_aliases import router as aliases_router
 from eval_card_registry.api.routes_health import router as health_router
@@ -44,5 +45,6 @@ PREFIX = "/api/v1"
 app.include_router(resolve_router, prefix=PREFIX)
 app.include_router(entities_router, prefix=PREFIX)
 app.include_router(aliases_router, prefix=PREFIX)
 app.include_router(health_router, prefix=PREFIX)

 from eval_card_registry.api.routes_resolve import router as resolve_router
 from eval_card_registry.api.routes_entities import router as entities_router
 from eval_card_registry.api.routes_aliases import router as aliases_router
+from eval_card_registry.api.routes_orgs import router as orgs_router
 from eval_card_registry.api.routes_health import router as health_router
 app.include_router(resolve_router, prefix=PREFIX)
 app.include_router(entities_router, prefix=PREFIX)
+app.include_router(orgs_router, prefix=PREFIX)
 app.include_router(aliases_router, prefix=PREFIX)
 app.include_router(health_router, prefix=PREFIX)

src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc and b/src/eval_card_registry/services/__pycache__/resolution_service.cpython-314.pyc differ

src/eval_card_registry/services/resolution_service.py CHANGED Viewed

@@ -28,6 +28,13 @@ _ENTITY_TABLE = {
     "benchmark": "canonical_benchmarks",
     "metric": "canonical_metrics",
     "harness": "eval_harnesses",
 }
@@ -62,6 +69,7 @@ def _no_match_result() -> dict:
         "confidence": 0.0,
         "created_new": False,
         "review_status": None,
     }
@@ -71,6 +79,7 @@ def _match_result(
     confidence: float,
     review_status: Optional[str],
     created_new: bool = False,
 ) -> dict:
     return {
         "canonical_id": canonical_id,
@@ -78,17 +87,30 @@ def _match_result(
         "confidence": confidence,
         "created_new": created_new,
         "review_status": review_status,
     }
 class ResolutionService:
     def __init__(self, registry_store: RegistryStore) -> None:
         self.store = registry_store
         self._resolver: Optional[Resolver] = None
-        # Cache: (raw_value, entity_type) → resolve result dict.
         # Avoids re-running the full strategy chain for duplicate strings
         # (e.g. "Accuracy" appears in every record).
-        self._resolve_cache: dict[tuple[str, str], dict] = {}
     def _get_resolver(self) -> Resolver:
         if self._resolver is None:
@@ -133,6 +155,7 @@ class ResolutionService:
                     result.strategy,
                     result.confidence,
                     entity.get("review_status") if entity else None,
                 )
             else:
                 result_dict = _no_match_result()
@@ -149,6 +172,7 @@ class ResolutionService:
                     existing["strategy"],
                     existing["confidence"],
                     entity.get("review_status") if entity else None,
                 )
                 self._resolve_cache[cache_key] = result_dict
                 return result_dict
@@ -236,6 +260,7 @@ class ResolutionService:
             result.confidence,
             entity.get("review_status") if entity else "draft",
             created_new=created_new,
         )
         self._resolve_cache[cache_key] = result_dict
         return result_dict
@@ -258,17 +283,36 @@ class ResolutionService:
             "updated_at": now,
         }
         if entity_type == "model":
-            base.update({"developer": None, "family": None, "architecture": None, "params_billions": None, "tags": "[]"})
         elif entity_type == "benchmark":
             base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
         elif entity_type == "metric":
             base.update({"score_type": None, "lower_is_better": False, "min_score": None, "max_score": None})
         elif entity_type == "harness":
             base.update({"version": None, "fork_url": None})
         queries.upsert_entity(self.store, table, base, buffered=True)
         return candidate_id
     def _find_alias_id(
         self,
         raw_value: str,

     "benchmark": "canonical_benchmarks",
     "metric": "canonical_metrics",
     "harness": "eval_harnesses",
+    "org": "canonical_orgs",
+}
+_PARENT_FIELD = {
+    "model": "parent_model_id",
+    "benchmark": "parent_benchmark_id",
+    "org": "parent_org_id",
 }
         "confidence": 0.0,
         "created_new": False,
         "review_status": None,
+        "parent_canonical_id": None,
     }
     confidence: float,
     review_status: Optional[str],
     created_new: bool = False,
+    parent_canonical_id: Optional[str] = None,
 ) -> dict:
     return {
         "canonical_id": canonical_id,
         "confidence": confidence,
         "created_new": created_new,
         "review_status": review_status,
+        "parent_canonical_id": parent_canonical_id,
     }
+def _parent_canonical_id(entity_type: str, entity: Optional[dict]) -> Optional[str]:
+    if not entity:
+        return None
+    field = _PARENT_FIELD.get(entity_type)
+    if not field:
+        return None
+    value = entity.get(field)
+    if queries._is_na(value):
+        return None
+    return value or None
 class ResolutionService:
     def __init__(self, registry_store: RegistryStore) -> None:
         self.store = registry_store
         self._resolver: Optional[Resolver] = None
+        # Cache: (raw_value, entity_type, source_config) → resolve result dict.
         # Avoids re-running the full strategy chain for duplicate strings
         # (e.g. "Accuracy" appears in every record).
+        self._resolve_cache: dict[tuple[str, str, Optional[str]], dict] = {}
     def _get_resolver(self) -> Resolver:
         if self._resolver is None:
                     result.strategy,
                     result.confidence,
                     entity.get("review_status") if entity else None,
+                    parent_canonical_id=_parent_canonical_id(entity_type, entity),
                 )
             else:
                 result_dict = _no_match_result()
                     existing["strategy"],
                     existing["confidence"],
                     entity.get("review_status") if entity else None,
+                    parent_canonical_id=_parent_canonical_id(entity_type, entity),
                 )
                 self._resolve_cache[cache_key] = result_dict
                 return result_dict
             result.confidence,
             entity.get("review_status") if entity else "draft",
             created_new=created_new,
+            parent_canonical_id=_parent_canonical_id(entity_type, entity),
         )
         self._resolve_cache[cache_key] = result_dict
         return result_dict
             "updated_at": now,
         }
         if entity_type == "model":
+            base.update({
+                "developer": None,
+                "org_id": self._resolve_model_org_id(raw_value),
+                "family": None,
+                "architecture": None,
+                "params_billions": None,
+                "parent_model_id": None,
+                "tags": "[]",
+            })
         elif entity_type == "benchmark":
             base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
         elif entity_type == "metric":
             base.update({"score_type": None, "lower_is_better": False, "min_score": None, "max_score": None})
         elif entity_type == "harness":
             base.update({"version": None, "fork_url": None})
+        elif entity_type == "org":
+            base.update({"parent_org_id": None, "website": None, "hf_org": None, "tags": "[]"})
         queries.upsert_entity(self.store, table, base, buffered=True)
         return candidate_id
+    def _resolve_model_org_id(self, raw_value: str) -> Optional[str]:
+        if "/" not in raw_value:
+            return None
+        raw_org = raw_value.split("/", 1)[0].strip()
+        if not raw_org:
+            return None
+        result = self._get_resolver().resolve(raw_org, "org", None)
+        return result.canonical_id
     def _find_alias_id(
         self,
         raw_value: str,

src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/hf_store.cpython-314.pyc differ

src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/queries.cpython-314.pyc differ

src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc CHANGED Viewed

Binary files a/src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc and b/src/eval_card_registry/store/__pycache__/schemas.cpython-314.pyc differ

src/eval_card_registry/store/hf_store.py CHANGED Viewed

@@ -28,6 +28,7 @@ def _fixtures_path() -> Path:
 TABLE_NAMES = [
     "canonical_models",
     "canonical_benchmarks",
     "canonical_metrics",
@@ -40,6 +41,7 @@ TABLE_NAMES = [
 # Tables needed for query-only (read-only) mode
 QUERY_TABLE_NAMES = [
     "canonical_models",
     "canonical_benchmarks",
     "canonical_metrics",

 TABLE_NAMES = [
+    "canonical_orgs",
     "canonical_models",
     "canonical_benchmarks",
     "canonical_metrics",
 # Tables needed for query-only (read-only) mode
 QUERY_TABLE_NAMES = [
+    "canonical_orgs",
     "canonical_models",
     "canonical_benchmarks",
     "canonical_metrics",

src/eval_card_registry/store/queries.py CHANGED Viewed

@@ -32,6 +32,11 @@ def _is_na(value) -> bool:
         return False
 def _row_to_dict(row: pd.Series) -> dict:
     """Convert a Series to dict, coercing pandas NA/NaN/NaT to None for JSON.
     Uses Series.to_dict() so numpy scalars are unboxed to Python types."""
@@ -154,13 +159,22 @@ def _rebuild_alias_index(store: RegistryStore) -> None:
     _alias_index = {}
     df = store.table("aliases")
     for _, row in df.iterrows():
-        key = (row["entity_type"], row["raw_value"], row.get("source_config"))
         if row.get("status") != "rejected":
-            _alias_index[key] = row.to_dict()
     # Also index pending aliases
     for pending_row in _get_pending(store, "aliases"):
         if pending_row.get("status") != "rejected":
-            key = (pending_row["entity_type"], pending_row["raw_value"], pending_row.get("source_config"))
             _alias_index[key] = pending_row
@@ -170,6 +184,7 @@ def get_alias(
     entity_type: str,
     source_config: Optional[str],
 ) -> Optional[dict]:
     # Fast path: use index if available
     if _alias_index:
         if source_config:
@@ -208,7 +223,7 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
     """
     raw_value = data["raw_value"]
     entity_type = data["entity_type"]
-    source_config = data.get("source_config")
     key = (entity_type, raw_value, source_config)
     # Check uniqueness via index if available
@@ -238,14 +253,21 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
     # Check pending buffer
     for p in _get_pending(store, "aliases"):
         if (p["entity_type"] == entity_type and p["raw_value"] == raw_value
-                and p.get("source_config") == source_config and p.get("status") != "rejected"):
             raise ValueError(
                 f"Alias already exists for ({entity_type!r}, {raw_value!r}, source_config={source_config!r}). "
                 "Use update_alias() to modify an existing alias."
             )
     now = _now()
-    row = {**data, "id": str(uuid.uuid4()), "created_at": now, "updated_at": now}
     if buffered:
         _get_pending(store, "aliases").append(row)
@@ -254,8 +276,9 @@ def add_alias(store: RegistryStore, data: dict, buffered: bool = False) -> dict:
         df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
         store.set_table("aliases", df)
-    # Update index (only non-rejected aliases block future inserts)
-    if row.get("status") != "rejected":
         _alias_index[key] = row
     return row
@@ -269,7 +292,20 @@ def update_alias(store: RegistryStore, alias_id: str, updates: dict) -> Optional
             df.loc[df["id"] == alias_id, col] = val
     df.loc[df["id"] == alias_id, "updated_at"] = _now()
     store.set_table("aliases", df)
-    return _row_to_dict(df[df["id"] == alias_id].iloc[0])
 # ------------------------------------------------------------------

         return False
+def _source_config_key(value) -> Optional[str]:
+    """Normalize nullable source_config values for alias-index keys."""
+    return None if _is_na(value) else value
 def _row_to_dict(row: pd.Series) -> dict:
     """Convert a Series to dict, coercing pandas NA/NaN/NaT to None for JSON.
     Uses Series.to_dict() so numpy scalars are unboxed to Python types."""
     _alias_index = {}
     df = store.table("aliases")
     for _, row in df.iterrows():
         if row.get("status") != "rejected":
+            row_dict = _row_to_dict(row)
+            key = (
+                row_dict["entity_type"],
+                row_dict["raw_value"],
+                _source_config_key(row_dict.get("source_config")),
+            )
+            _alias_index[key] = row_dict
     # Also index pending aliases
     for pending_row in _get_pending(store, "aliases"):
         if pending_row.get("status") != "rejected":
+            key = (
+                pending_row["entity_type"],
+                pending_row["raw_value"],
+                _source_config_key(pending_row.get("source_config")),
+            )
             _alias_index[key] = pending_row
     entity_type: str,
     source_config: Optional[str],
 ) -> Optional[dict]:
+    source_config = _source_config_key(source_config)
     # Fast path: use index if available
     if _alias_index:
         if source_config:
     """
     raw_value = data["raw_value"]
     entity_type = data["entity_type"]
+    source_config = _source_config_key(data.get("source_config"))
     key = (entity_type, raw_value, source_config)
     # Check uniqueness via index if available
     # Check pending buffer
     for p in _get_pending(store, "aliases"):
         if (p["entity_type"] == entity_type and p["raw_value"] == raw_value
+                and _source_config_key(p.get("source_config")) == source_config
+                and p.get("status") != "rejected"):
             raise ValueError(
                 f"Alias already exists for ({entity_type!r}, {raw_value!r}, source_config={source_config!r}). "
                 "Use update_alias() to modify an existing alias."
             )
     now = _now()
+    row = {
+        **data,
+        "source_config": source_config,
+        "id": str(uuid.uuid4()),
+        "created_at": now,
+        "updated_at": now,
+    }
     if buffered:
         _get_pending(store, "aliases").append(row)
         df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
         store.set_table("aliases", df)
+    # Update index only if it has already been built. If it is empty, get_alias
+    # should keep using the DataFrame/pending slow path instead of a partial index.
+    if _alias_index and row.get("status") != "rejected":
         _alias_index[key] = row
     return row
             df.loc[df["id"] == alias_id, col] = val
     df.loc[df["id"] == alias_id, "updated_at"] = _now()
     store.set_table("aliases", df)
+    updated = _row_to_dict(df[df["id"] == alias_id].iloc[0])
+    # Keep the in-memory index in sync if it was built — otherwise a follow-up
+    # add_alias() / get_alias() would see stale canonical data for this key.
+    if _alias_index:
+        key = (
+            updated["entity_type"],
+            updated["raw_value"],
+            _source_config_key(updated.get("source_config")),
+        )
+        if updated.get("status") != "rejected":
+            _alias_index[key] = updated
+        else:
+            _alias_index.pop(key, None)
+    return updated
 # ------------------------------------------------------------------

src/eval_card_registry/store/schemas.py CHANGED Viewed

@@ -3,13 +3,27 @@ import pandas as pd
 _SCHEMAS: dict[str, dict] = {
     "canonical_models": {
         "id": pd.StringDtype(),
         "display_name": pd.StringDtype(),
         "developer": pd.StringDtype(),
         "family": pd.StringDtype(),
         "architecture": pd.StringDtype(),
         "params_billions": "float64",
         "tags": pd.StringDtype(),     # JSON-encoded list
         "metadata": pd.StringDtype(), # JSON-encoded dict
         "review_status": pd.StringDtype(),

 _SCHEMAS: dict[str, dict] = {
+    "canonical_orgs": {
+        "id": pd.StringDtype(),
+        "display_name": pd.StringDtype(),
+        "parent_org_id": pd.StringDtype(),
+        "website": pd.StringDtype(),
+        "hf_org": pd.StringDtype(),
+        "tags": pd.StringDtype(),     # JSON-encoded list
+        "metadata": pd.StringDtype(), # JSON-encoded dict
+        "review_status": pd.StringDtype(),
+        "created_at": pd.StringDtype(),
+        "updated_at": pd.StringDtype(),
+    },
     "canonical_models": {
         "id": pd.StringDtype(),
         "display_name": pd.StringDtype(),
         "developer": pd.StringDtype(),
+        "org_id": pd.StringDtype(),
         "family": pd.StringDtype(),
         "architecture": pd.StringDtype(),
         "params_billions": "float64",
+        "parent_model_id": pd.StringDtype(),
         "tags": pd.StringDtype(),     # JSON-encoded list
         "metadata": pd.StringDtype(), # JSON-encoded dict
         "review_status": pd.StringDtype(),