Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
packages/eval-entity-resolver/src/eval_entity_resolver/strategies/fuzzy.py
CHANGED
|
@@ -108,9 +108,15 @@ _STRIP_SUFFIX_PATTERNS: list[re.Pattern[str]] = [
|
|
| 108 |
# exist; only when they don't does this strip's drop-thinking behavior
|
| 109 |
# take over.
|
| 110 |
re.compile(r"-thinking-\d+k$", re.IGNORECASE),
|
| 111 |
-
#
|
| 112 |
-
#
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
]
|
| 115 |
|
| 116 |
# Strip just the `-Nk` budget tail, leaving `-thinking` intact. Used by
|
|
@@ -382,18 +388,28 @@ _ISO_DATE_YEAR_RE = re.compile(r"^(.+)-(\d{4})$")
|
|
| 382 |
|
| 383 |
|
| 384 |
def _strip_openai_iso_date(value: str) -> list[str]:
|
| 385 |
-
"""For OpenAI-shaped values ending in an ISO-format date, return
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
"""
|
| 398 |
if not _is_openai_shaped(value):
|
| 399 |
return []
|
|
@@ -415,7 +431,6 @@ def _strip_openai_iso_date(value: str) -> list[str]:
|
|
| 415 |
if _is_release_year(y) and 1 <= int(mo) <= 12 and 1 <= int(d) <= 31:
|
| 416 |
candidates.append(f"{prefix}-{y}-{mo}")
|
| 417 |
candidates.append(f"{prefix}-{y}")
|
| 418 |
-
candidates.append(prefix)
|
| 419 |
return candidates
|
| 420 |
|
| 421 |
m = _ISO_DATE_MONTH_RE.match(value)
|
|
@@ -423,15 +438,11 @@ def _strip_openai_iso_date(value: str) -> list[str]:
|
|
| 423 |
prefix, y, mo = m.groups()
|
| 424 |
if _is_release_year(y) and 1 <= int(mo) <= 12:
|
| 425 |
candidates.append(f"{prefix}-{y}")
|
| 426 |
-
candidates.append(prefix)
|
| 427 |
return candidates
|
| 428 |
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
if _is_release_year(y):
|
| 433 |
-
candidates.append(prefix)
|
| 434 |
-
|
| 435 |
return candidates
|
| 436 |
|
| 437 |
|
|
|
|
| 108 |
# exist; only when they don't does this strip's drop-thinking behavior
|
| 109 |
# take over.
|
| 110 |
re.compile(r"-thinking-\d+k$", re.IGNORECASE),
|
| 111 |
+
# NB: trailing 8-digit date suffix (`-20251101`) is NOT stripped here.
|
| 112 |
+
# Stripping a packed YYYYMMDD ALWAYS produces the bare-family form,
|
| 113 |
+
# which silently aliases dated snapshots into their family pointer
|
| 114 |
+
# and loses the snapshot's `release_date`. The auto-create +
|
| 115 |
+
# hub-stats path produces a properly-linked snapshot canonical
|
| 116 |
+
# instead. See `infer_family_parent_edge` in
|
| 117 |
+
# services/hub_stats.py for the family-version edge inference.
|
| 118 |
+
# When a snapshot canonical is already aliased (exact / normalized
|
| 119 |
+
# match wins before fuzzy), the resolver returns it directly.
|
| 120 |
]
|
| 121 |
|
| 122 |
# Strip just the `-Nk` budget tail, leaving `-thinking` intact. Used by
|
|
|
|
| 388 |
|
| 389 |
|
| 390 |
def _strip_openai_iso_date(value: str) -> list[str]:
|
| 391 |
+
"""For OpenAI-shaped values ending in an ISO-format date, return
|
| 392 |
+
progressively-truncated candidates that STILL retain at least one
|
| 393 |
+
date component. The bare-family candidate (everything stripped) is
|
| 394 |
+
intentionally omitted: collapsing a dated snapshot all the way to
|
| 395 |
+
its family pointer drops the per-snapshot identity and silently
|
| 396 |
+
loses the snapshot's `release_date`. The auto-create + hub-stats
|
| 397 |
+
path is the right home for that case β it creates a snapshot
|
| 398 |
+
canonical with a `variant axis=version` parent edge to the family.
|
| 399 |
+
|
| 400 |
+
When an INTERMEDIATE snapshot canonical is aliased in the registry
|
| 401 |
+
(e.g. `openai/gpt-5-2025-08`), this function still returns it as a
|
| 402 |
+
candidate so a more-specific raw value (`openai/gpt-5-2025-08-07`)
|
| 403 |
+
can resolve to the existing snapshot rather than auto-creating a
|
| 404 |
+
duplicate.
|
| 405 |
+
|
| 406 |
+
Examples (registry contents shape what hits β this just emits the
|
| 407 |
+
candidates that are tried in order):
|
| 408 |
+
openai/gpt-5-2025-08-07 β [openai/gpt-5-2025-08, openai/gpt-5-2025]
|
| 409 |
+
openai/o3-mini-2025-01-31 β [openai/o3-mini-2025-01, openai/o3-mini-2025]
|
| 410 |
+
openai/gpt-4o-mini-2024 β [] (year-only has no intermediate;
|
| 411 |
+
handled via auto-create path)
|
| 412 |
+
meta/llama-3-2024-04-18 β [] (not OpenAI-shaped)
|
| 413 |
"""
|
| 414 |
if not _is_openai_shaped(value):
|
| 415 |
return []
|
|
|
|
| 431 |
if _is_release_year(y) and 1 <= int(mo) <= 12 and 1 <= int(d) <= 31:
|
| 432 |
candidates.append(f"{prefix}-{y}-{mo}")
|
| 433 |
candidates.append(f"{prefix}-{y}")
|
|
|
|
| 434 |
return candidates
|
| 435 |
|
| 436 |
m = _ISO_DATE_MONTH_RE.match(value)
|
|
|
|
| 438 |
prefix, y, mo = m.groups()
|
| 439 |
if _is_release_year(y) and 1 <= int(mo) <= 12:
|
| 440 |
candidates.append(f"{prefix}-{y}")
|
|
|
|
| 441 |
return candidates
|
| 442 |
|
| 443 |
+
# Year-only case (`-YYYY`) intentionally produces no candidates: the
|
| 444 |
+
# only possible peel is to bare family, which the auto-create path
|
| 445 |
+
# owns. Returning empty falls through to no_match cleanly.
|
|
|
|
|
|
|
|
|
|
| 446 |
return candidates
|
| 447 |
|
| 448 |
|
src/eval_card_registry/services/hub_stats.py
CHANGED
|
@@ -151,6 +151,158 @@ def filter_useful_tags(raw_tags) -> list[str]:
|
|
| 151 |
return sorted(set(keep))
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
def extract_base_models(base_models) -> list[dict]:
|
| 155 |
"""Decode the `baseModels` struct into a list of typed parent edges.
|
| 156 |
Returns `[{id, relationship}, ...]` β caller resolves each id to our
|
|
@@ -295,14 +447,21 @@ class HubStatsClient:
|
|
| 295 |
try:
|
| 296 |
con = self._ensure_con()
|
| 297 |
use_local = self._ensure_local_table(con)
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
if use_local:
|
| 300 |
-
sql = f"SELECT * FROM hub_stats WHERE id = '{escaped}' LIMIT 1"
|
| 301 |
else:
|
| 302 |
sql = (
|
| 303 |
f"SELECT {QUERY_COLUMNS} "
|
| 304 |
f"FROM read_parquet('{self.parquet_url}') "
|
| 305 |
-
f"WHERE id = '{escaped}' LIMIT 1"
|
| 306 |
)
|
| 307 |
cursor = con.execute(sql)
|
| 308 |
cols = [d[0] for d in cursor.description]
|
|
@@ -330,6 +489,7 @@ def enrich_draft_from_row(
|
|
| 330 |
row: dict,
|
| 331 |
aliases_to_canonical: dict[str, str],
|
| 332 |
org_alias_map: dict[str, str],
|
|
|
|
| 333 |
) -> dict:
|
| 334 |
"""Convert one hub-stats row into a partial canonical_models dict
|
| 335 |
suitable for merging into an auto-created draft. Computes:
|
|
@@ -383,6 +543,24 @@ def enrich_draft_from_row(
|
|
| 383 |
if lineage_origin_org_id is None and edge["relationship"] != "variant":
|
| 384 |
if "/" in parent_canonical:
|
| 385 |
lineage_origin_org_id = parent_canonical.split("/", 1)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
if parents:
|
| 387 |
out["parents"] = json.dumps(parents)
|
| 388 |
if lineage_origin_org_id:
|
|
|
|
| 151 |
return sorted(set(keep))
|
| 152 |
|
| 153 |
|
| 154 |
+
# ---------------------------------------------------------------------------
|
| 155 |
+
# Family-version parent inference
|
| 156 |
+
# ---------------------------------------------------------------------------
|
| 157 |
+
#
|
| 158 |
+
# Hub-stats `baseModels` records *upstream* lineage (finetune / quantized /
|
| 159 |
+
# merge / adapter), never the family-version relationship between a dated
|
| 160 |
+
# snapshot and its moving pointer canonical (`Olmo-3-1125-32B` β our
|
| 161 |
+
# `allenai/olmo-3-32b`). The pointer isn't an HF id β it only exists in our
|
| 162 |
+
# registry β so HF can't surface that edge. Without inference here, dated
|
| 163 |
+
# snapshots auto-create as orphaned canonicals: `release_date` lands fine
|
| 164 |
+
# but `parents`/`root_model_id` stay empty, root-collapse never fires, and
|
| 165 |
+
# the snapshot shows up as a separate model in consumers.
|
| 166 |
+
|
| 167 |
+
_INTERNAL_DATE_RE = re.compile(r"^(.+?)-(\d{4})-([^-].*)$")
|
| 168 |
+
_TRAILING_4DIGIT_RE = re.compile(r"^(.+)-(\d{4})$")
|
| 169 |
+
_TRAILING_6DIGIT_RE = re.compile(r"^(.+)-(\d{6})$")
|
| 170 |
+
_TRAILING_8DIGIT_RE = re.compile(r"^(.+)-(\d{8})$")
|
| 171 |
+
# ISO date patterns (anchored, full-string). Strict component widths
|
| 172 |
+
# stop us from peeling tokens that aren't dates (a 5-digit numeric tail
|
| 173 |
+
# won't match `\d{4}-\d{2}`).
|
| 174 |
+
_ISO_FULL_DATE_RE = re.compile(r"^(.+)-(\d{4})-(\d{2})-(\d{2})$")
|
| 175 |
+
_ISO_MONTH_DATE_RE = re.compile(r"^(.+)-(\d{4})-(\d{2})$")
|
| 176 |
+
_ISO_YEAR_DATE_RE = re.compile(r"^(.+)-(\d{4})$")
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def _looks_like_mmdd(token: str) -> bool:
|
| 180 |
+
"""4-digit MMDD where MM β [01,12] and DD β [01,31]. Used to gate
|
| 181 |
+
snapshot-token stripping on shapes that actually look like dates,
|
| 182 |
+
avoiding false-positives on numeric size/version tokens like `8000`."""
|
| 183 |
+
if len(token) != 4 or not token.isdigit():
|
| 184 |
+
return False
|
| 185 |
+
mm, dd = int(token[:2]), int(token[2:])
|
| 186 |
+
return 1 <= mm <= 12 and 1 <= dd <= 31
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _looks_like_yyyymm(token: str) -> bool:
|
| 190 |
+
"""6-digit YYYYMM (year+month). Stepfun and several Chinese-lab
|
| 191 |
+
release tags use this convention, e.g. `step-2-16k-202411`."""
|
| 192 |
+
if len(token) != 6 or not token.isdigit():
|
| 193 |
+
return False
|
| 194 |
+
yyyy, mm = int(token[:4]), int(token[4:])
|
| 195 |
+
return 2015 <= yyyy <= 2035 and 1 <= mm <= 12
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def _looks_like_yyyymmdd(token: str) -> bool:
|
| 199 |
+
if len(token) != 8 or not token.isdigit():
|
| 200 |
+
return False
|
| 201 |
+
yyyy, mm, dd = int(token[:4]), int(token[4:6]), int(token[6:])
|
| 202 |
+
return 2015 <= yyyy <= 2035 and 1 <= mm <= 12 and 1 <= dd <= 31
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _looks_like_release_year(token: str) -> bool:
|
| 206 |
+
if len(token) != 4 or not token.isdigit():
|
| 207 |
+
return False
|
| 208 |
+
return 2015 <= int(token) <= 2035
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def infer_family_parent_edge(
|
| 212 |
+
hf_id: str,
|
| 213 |
+
aliases_to_canonical: dict[str, str],
|
| 214 |
+
target_canonical: Optional[str] = None,
|
| 215 |
+
) -> Optional[dict]:
|
| 216 |
+
"""Detect snapshot-shape ids whose stripped form matches an existing
|
| 217 |
+
canonical, and return a `{id, relationship: variant, axis: version}`
|
| 218 |
+
edge pointing at it. Returns None when the id has no snapshot shape
|
| 219 |
+
or the stripped form doesn't match any known canonical/alias.
|
| 220 |
+
|
| 221 |
+
Patterns recognized (single-pass strip β does NOT compose with
|
| 222 |
+
mode/quant suffix stripping):
|
| 223 |
+
- internal MMDD token: `Olmo-3-1125-32B` β `Olmo-3-32B`
|
| 224 |
+
also `Olmo-3-1125-7B-Instruct` β `Olmo-3-7B-Instruct`
|
| 225 |
+
- trailing MMDD token: `kimi-k2-0905` β `kimi-k2`
|
| 226 |
+
- trailing YYYYMM token: `step-2-16k-202411` β `step-2-16k`
|
| 227 |
+
- trailing YYYYMMDD: `claude-haiku-4-5-20251001` β `claude-haiku-4-5`
|
| 228 |
+
- trailing ISO date ladder: `gpt-5-2025-08-07` β
|
| 229 |
+
`gpt-5-2025-08` β `gpt-5-2025` β `gpt-5`
|
| 230 |
+
|
| 231 |
+
Only fires when the candidate stripped form resolves through the
|
| 232 |
+
alias index β no false matches manufactured by stripping alone.
|
| 233 |
+
For compound mode+date inputs (`claude-4-5-thinking-20251001`), the
|
| 234 |
+
strip resolves to the mode-promoted canonical iff one exists; if
|
| 235 |
+
not, returns None (the snapshot still gets `release_date` from
|
| 236 |
+
hub-stats but lands without a parent edge).
|
| 237 |
+
|
| 238 |
+
`target_canonical` is the canonical id the inferred edge will be
|
| 239 |
+
attached to. When provided, suppresses self-edges (matters in the
|
| 240 |
+
bulk-refresh path where an HF id may be aliased directly to its
|
| 241 |
+
family pointer rather than a separate snapshot canonical β without
|
| 242 |
+
this guard the family pointer gains a parent edge to itself,
|
| 243 |
+
breaking the lineage walker). Live auto-create can also pass the
|
| 244 |
+
proposed draft id; it just makes the guard tighter.
|
| 245 |
+
"""
|
| 246 |
+
candidates: list[str] = []
|
| 247 |
+
|
| 248 |
+
# Internal MMDD: `Olmo-3-1125-32B` shape. Tries first because
|
| 249 |
+
# internal-token strips give a more specific lookup target than
|
| 250 |
+
# trailing-token strips.
|
| 251 |
+
m = _INTERNAL_DATE_RE.match(hf_id)
|
| 252 |
+
if m and _looks_like_mmdd(m.group(2)):
|
| 253 |
+
prefix, _, suffix = m.groups()
|
| 254 |
+
candidates.append(f"{prefix}-{suffix}")
|
| 255 |
+
|
| 256 |
+
# ISO ladder (full β month β year). The three regexes match
|
| 257 |
+
# mutually exclusive tail shapes (`-YYYY-MM-DD` vs `-YYYY-MM` vs
|
| 258 |
+
# `-YYYY`), so each input fires at most one branch.
|
| 259 |
+
m = _ISO_FULL_DATE_RE.match(hf_id)
|
| 260 |
+
if m:
|
| 261 |
+
prefix, y, mo, d = m.groups()
|
| 262 |
+
if (_looks_like_release_year(y) and 1 <= int(mo) <= 12
|
| 263 |
+
and 1 <= int(d) <= 31):
|
| 264 |
+
candidates.append(f"{prefix}-{y}-{mo}")
|
| 265 |
+
candidates.append(f"{prefix}-{y}")
|
| 266 |
+
candidates.append(prefix)
|
| 267 |
+
else:
|
| 268 |
+
m = _ISO_MONTH_DATE_RE.match(hf_id)
|
| 269 |
+
if m:
|
| 270 |
+
prefix, y, mo = m.groups()
|
| 271 |
+
if _looks_like_release_year(y) and 1 <= int(mo) <= 12:
|
| 272 |
+
candidates.append(f"{prefix}-{y}")
|
| 273 |
+
candidates.append(prefix)
|
| 274 |
+
else:
|
| 275 |
+
m = _ISO_YEAR_DATE_RE.match(hf_id)
|
| 276 |
+
if m:
|
| 277 |
+
prefix, y = m.groups()
|
| 278 |
+
if _looks_like_release_year(y):
|
| 279 |
+
candidates.append(prefix)
|
| 280 |
+
|
| 281 |
+
# Trailing YYYYMMDD (Anthropic/xAI/Tencent style).
|
| 282 |
+
m = _TRAILING_8DIGIT_RE.match(hf_id)
|
| 283 |
+
if m and _looks_like_yyyymmdd(m.group(2)):
|
| 284 |
+
candidates.append(m.group(1))
|
| 285 |
+
|
| 286 |
+
# Trailing YYYYMM (Stepfun and several Chinese-lab release tags).
|
| 287 |
+
m = _TRAILING_6DIGIT_RE.match(hf_id)
|
| 288 |
+
if m and _looks_like_yyyymm(m.group(2)):
|
| 289 |
+
candidates.append(m.group(1))
|
| 290 |
+
|
| 291 |
+
# Trailing 4-digit MMDD (Moonshot/Kimi, Google -exp tags).
|
| 292 |
+
m = _TRAILING_4DIGIT_RE.match(hf_id)
|
| 293 |
+
if m and _looks_like_mmdd(m.group(2)):
|
| 294 |
+
candidates.append(m.group(1))
|
| 295 |
+
|
| 296 |
+
for cand in candidates:
|
| 297 |
+
canonical = aliases_to_canonical.get(normalize(cand))
|
| 298 |
+
if not canonical:
|
| 299 |
+
continue
|
| 300 |
+
if target_canonical is not None and canonical == target_canonical:
|
| 301 |
+
continue
|
| 302 |
+
return {"id": canonical, "relationship": "variant", "axis": "version"}
|
| 303 |
+
return None
|
| 304 |
+
|
| 305 |
+
|
| 306 |
def extract_base_models(base_models) -> list[dict]:
|
| 307 |
"""Decode the `baseModels` struct into a list of typed parent edges.
|
| 308 |
Returns `[{id, relationship}, ...]` β caller resolves each id to our
|
|
|
|
| 447 |
try:
|
| 448 |
con = self._ensure_con()
|
| 449 |
use_local = self._ensure_local_table(con)
|
| 450 |
+
# Case-insensitive match β HF stores ids with the upstream
|
| 451 |
+
# author's original casing (`allenai/Olmo-3-1125-32B`); EEE
|
| 452 |
+
# surfaces values in mixed conventions (some leaderboards
|
| 453 |
+
# lowercase, some preserve). An exact-case `=` filter
|
| 454 |
+
# silently misses any casing mismatch and the draft lands
|
| 455 |
+
# without enrichment metadata. LOWER() forces a match
|
| 456 |
+
# regardless of the surface form.
|
| 457 |
+
escaped = hf_id.lower().replace("'", "''")
|
| 458 |
if use_local:
|
| 459 |
+
sql = f"SELECT * FROM hub_stats WHERE LOWER(id) = '{escaped}' LIMIT 1"
|
| 460 |
else:
|
| 461 |
sql = (
|
| 462 |
f"SELECT {QUERY_COLUMNS} "
|
| 463 |
f"FROM read_parquet('{self.parquet_url}') "
|
| 464 |
+
f"WHERE LOWER(id) = '{escaped}' LIMIT 1"
|
| 465 |
)
|
| 466 |
cursor = con.execute(sql)
|
| 467 |
cols = [d[0] for d in cursor.description]
|
|
|
|
| 489 |
row: dict,
|
| 490 |
aliases_to_canonical: dict[str, str],
|
| 491 |
org_alias_map: dict[str, str],
|
| 492 |
+
target_canonical: Optional[str] = None,
|
| 493 |
) -> dict:
|
| 494 |
"""Convert one hub-stats row into a partial canonical_models dict
|
| 495 |
suitable for merging into an auto-created draft. Computes:
|
|
|
|
| 543 |
if lineage_origin_org_id is None and edge["relationship"] != "variant":
|
| 544 |
if "/" in parent_canonical:
|
| 545 |
lineage_origin_org_id = parent_canonical.split("/", 1)[0]
|
| 546 |
+
|
| 547 |
+
# Family-version inference: hub-stats `baseModels` only records
|
| 548 |
+
# upstream-lineage edges (finetune/quantized/merge/adapter), never
|
| 549 |
+
# the dated-snapshot β moving-pointer relationship that lives only
|
| 550 |
+
# in our registry. Without this, snapshots like `Olmo-3-1125-32B`
|
| 551 |
+
# auto-create as orphan canonicals β release_date lands but parents
|
| 552 |
+
# stays empty and root-collapse never fires.
|
| 553 |
+
hf_id = row.get("id")
|
| 554 |
+
if isinstance(hf_id, str) and not any(
|
| 555 |
+
p.get("relationship") == "variant" and p.get("axis") == "version"
|
| 556 |
+
for p in parents
|
| 557 |
+
):
|
| 558 |
+
version_edge = infer_family_parent_edge(
|
| 559 |
+
hf_id, aliases_to_canonical, target_canonical=target_canonical,
|
| 560 |
+
)
|
| 561 |
+
if version_edge is not None:
|
| 562 |
+
parents.append(version_edge)
|
| 563 |
+
|
| 564 |
if parents:
|
| 565 |
out["parents"] = json.dumps(parents)
|
| 566 |
if lineage_origin_org_id:
|
src/eval_card_registry/services/resolution_service.py
CHANGED
|
@@ -9,8 +9,11 @@ Responsibilities:
|
|
| 9 |
"""
|
| 10 |
from __future__ import annotations
|
| 11 |
|
|
|
|
| 12 |
import re
|
|
|
|
| 13 |
import uuid
|
|
|
|
| 14 |
from datetime import datetime, timezone
|
| 15 |
from typing import Optional
|
| 16 |
|
|
@@ -49,6 +52,32 @@ def _now() -> str:
|
|
| 49 |
return datetime.now(timezone.utc).isoformat()
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def _build_alias_store(registry_store: RegistryStore) -> AliasStore:
|
| 53 |
"""Build an AliasStore from the registry's in-memory aliases table."""
|
| 54 |
aliases_df = registry_store.table("aliases")
|
|
@@ -60,13 +89,16 @@ def _build_canonical_store(registry_store: RegistryStore) -> CanonicalStore:
|
|
| 60 |
tables. Lets the bare resolver enrich its results with the same
|
| 61 |
metadata fields the HTTP API exposes β including benchmark
|
| 62 |
`family_key` / `category` (which need families_df + composites_df
|
| 63 |
-
to populate; otherwise they fall back to the benchmark's own id).
|
|
|
|
|
|
|
|
|
|
| 64 |
return CanonicalStore(
|
| 65 |
-
models_df=
|
| 66 |
-
benchmarks_df=
|
| 67 |
-
metrics_df=
|
| 68 |
-
harnesses_df=
|
| 69 |
-
orgs_df=
|
| 70 |
families_df=registry_store.table("canonical_families") if registry_store.has_table("canonical_families") else None,
|
| 71 |
composites_df=registry_store.table("canonical_composites") if registry_store.has_table("canonical_composites") else None,
|
| 72 |
)
|
|
@@ -182,18 +214,36 @@ class ResolutionService:
|
|
| 182 |
return result_dict
|
| 183 |
|
| 184 |
# Check if alias already exists (skip resolver on rerun=False).
|
| 185 |
-
#
|
| 186 |
-
#
|
| 187 |
-
#
|
| 188 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
if not rerun:
|
| 190 |
existing = queries.get_alias(self.store, raw_value, entity_type, source_config)
|
| 191 |
if existing:
|
| 192 |
resolver = self._get_resolver()
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
result_dict = _result_to_dict(enriched, created_new=False)
|
| 198 |
self._resolve_cache[cache_key] = result_dict
|
| 199 |
return result_dict
|
|
@@ -274,17 +324,33 @@ class ResolutionService:
|
|
| 274 |
if created_new:
|
| 275 |
self.invalidate_resolver()
|
| 276 |
|
| 277 |
-
# Build the enriched response
|
| 278 |
-
#
|
| 279 |
-
#
|
| 280 |
-
#
|
| 281 |
-
#
|
| 282 |
-
#
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
result_dict = _result_to_dict(enriched, created_new=created_new)
|
| 289 |
if created_new and result_dict.get("review_status") is None:
|
| 290 |
result_dict["review_status"] = "draft"
|
|
@@ -322,7 +388,7 @@ class ResolutionService:
|
|
| 322 |
# β `enrichment` is `{}` on lookup miss or any error.
|
| 323 |
enrichment: dict = {}
|
| 324 |
if entity_type == "model" and self._looks_like_hf_id(raw_value):
|
| 325 |
-
enrichment = self._lookup_hub_stats(raw_value) or {}
|
| 326 |
if entity_type == "model":
|
| 327 |
base.update({
|
| 328 |
"developer": None,
|
|
@@ -343,6 +409,16 @@ class ResolutionService:
|
|
| 343 |
for k, v in enrichment.items():
|
| 344 |
if v is not None:
|
| 345 |
base[k] = v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
elif entity_type == "benchmark":
|
| 347 |
base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
|
| 348 |
elif entity_type == "metric":
|
|
@@ -361,6 +437,37 @@ class ResolutionService:
|
|
| 361 |
queries.upsert_entity(self.store, table, base, buffered=True)
|
| 362 |
return candidate_id
|
| 363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
@staticmethod
|
| 365 |
def _looks_like_hf_id(raw_value: str) -> bool:
|
| 366 |
"""HF id heuristic: contains a single `/` with non-empty parts on
|
|
@@ -372,12 +479,18 @@ class ResolutionService:
|
|
| 372 |
org, name = raw_value.split("/", 1)
|
| 373 |
return bool(org.strip()) and bool(name.strip())
|
| 374 |
|
| 375 |
-
def _lookup_hub_stats(
|
|
|
|
|
|
|
| 376 |
"""Query hub-stats live for `hf_id` and return a partial draft
|
| 377 |
dict (release_date, params_billions, parents, lineage_origin_org_id,
|
| 378 |
tags, metadata) ready to merge. Returns None on miss or any error.
|
| 379 |
Uses the `aliases` table to resolve baseModels parents to our
|
| 380 |
-
canonical ids, and `canonical_orgs` HF aliases to map authors.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
if not settings.hub_stats_lookup_enabled:
|
| 382 |
return None
|
| 383 |
try:
|
|
@@ -390,7 +503,10 @@ class ResolutionService:
|
|
| 390 |
from eval_card_registry.services import hub_stats as _hs
|
| 391 |
try:
|
| 392 |
aliases_to_canonical, org_alias_map = self._build_hub_stats_indices()
|
| 393 |
-
return _hs.enrich_draft_from_row(
|
|
|
|
|
|
|
|
|
|
| 394 |
except Exception:
|
| 395 |
return None
|
| 396 |
|
|
|
|
| 9 |
"""
|
| 10 |
from __future__ import annotations
|
| 11 |
|
| 12 |
+
import json
|
| 13 |
import re
|
| 14 |
+
import threading
|
| 15 |
import uuid
|
| 16 |
+
from dataclasses import replace as _dc_replace
|
| 17 |
from datetime import datetime, timezone
|
| 18 |
from typing import Optional
|
| 19 |
|
|
|
|
| 52 |
return datetime.now(timezone.utc).isoformat()
|
| 53 |
|
| 54 |
|
| 55 |
+
def _table_with_pending(registry_store: RegistryStore, name: str) -> "pd.DataFrame":
|
| 56 |
+
"""Return a table DataFrame with pending-buffer rows appended.
|
| 57 |
+
|
| 58 |
+
`_auto_create_entity` writes drafts with `buffered=True`, so they sit
|
| 59 |
+
in `store._pending[<table>]` until `flush_pending` runs at the end of
|
| 60 |
+
a sync. Without overlaying pending here, the resolver's
|
| 61 |
+
`CanonicalStore` snapshot can't see the just-created row, and
|
| 62 |
+
`build_result` for an auto-created entity returns null for every
|
| 63 |
+
metadata field that hub-stats just enriched.
|
| 64 |
+
|
| 65 |
+
Concat is safe because `upsert_entity` enforces id-uniqueness across
|
| 66 |
+
base + pending (existing rows go to in-place update; only genuinely
|
| 67 |
+
new ids land in pending), so no duplicate keys end up in the
|
| 68 |
+
CanonicalStore index.
|
| 69 |
+
"""
|
| 70 |
+
import pandas as pd
|
| 71 |
+
base_df = registry_store.table(name) if registry_store.has_table(name) else pd.DataFrame()
|
| 72 |
+
pending = getattr(registry_store, "_pending", {}).get(name, [])
|
| 73 |
+
if not pending:
|
| 74 |
+
return base_df
|
| 75 |
+
pending_df = pd.DataFrame(pending)
|
| 76 |
+
if base_df.empty:
|
| 77 |
+
return pending_df
|
| 78 |
+
return pd.concat([base_df, pending_df], ignore_index=True)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
def _build_alias_store(registry_store: RegistryStore) -> AliasStore:
|
| 82 |
"""Build an AliasStore from the registry's in-memory aliases table."""
|
| 83 |
aliases_df = registry_store.table("aliases")
|
|
|
|
| 89 |
tables. Lets the bare resolver enrich its results with the same
|
| 90 |
metadata fields the HTTP API exposes β including benchmark
|
| 91 |
`family_key` / `category` (which need families_df + composites_df
|
| 92 |
+
to populate; otherwise they fall back to the benchmark's own id).
|
| 93 |
+
|
| 94 |
+
Pending-buffer rows are overlaid so the resolver sees auto-created
|
| 95 |
+
drafts before `flush_pending` runs. See `_table_with_pending`."""
|
| 96 |
return CanonicalStore(
|
| 97 |
+
models_df=_table_with_pending(registry_store, "canonical_models"),
|
| 98 |
+
benchmarks_df=_table_with_pending(registry_store, "canonical_benchmarks"),
|
| 99 |
+
metrics_df=_table_with_pending(registry_store, "canonical_metrics"),
|
| 100 |
+
harnesses_df=_table_with_pending(registry_store, "eval_harnesses"),
|
| 101 |
+
orgs_df=_table_with_pending(registry_store, "canonical_orgs") if registry_store.has_table("canonical_orgs") else None,
|
| 102 |
families_df=registry_store.table("canonical_families") if registry_store.has_table("canonical_families") else None,
|
| 103 |
composites_df=registry_store.table("canonical_composites") if registry_store.has_table("canonical_composites") else None,
|
| 104 |
)
|
|
|
|
| 214 |
return result_dict
|
| 215 |
|
| 216 |
# Check if alias already exists (skip resolver on rerun=False).
|
| 217 |
+
# Re-run the strategy chain so the response carries the correct
|
| 218 |
+
# `resolved_leaf_id` β the alias table only stores the
|
| 219 |
+
# root-collapsed `canonical_id`, so reconstructing the response
|
| 220 |
+
# via `build_result(root, ...)` would clobber the leaf to the
|
| 221 |
+
# root (model_metadata_fields can't recover leaf identity from
|
| 222 |
+
# a root row alone β there's no back-pointer). The strategy
|
| 223 |
+
# chain re-derives leaf cleanly; perf cost is one alias-index
|
| 224 |
+
# lookup since exact-match hits in O(1) for already-aliased
|
| 225 |
+
# values. Audit fields are overlaid from the alias entry so
|
| 226 |
+
# callers still see the original strategy/confidence.
|
| 227 |
if not rerun:
|
| 228 |
existing = queries.get_alias(self.store, raw_value, entity_type, source_config)
|
| 229 |
if existing:
|
| 230 |
resolver = self._get_resolver()
|
| 231 |
+
fresh = resolver.resolve(raw_value, entity_type, source_config)
|
| 232 |
+
if fresh.canonical_id == existing["canonical_id"]:
|
| 233 |
+
enriched = _dc_replace(
|
| 234 |
+
fresh,
|
| 235 |
+
strategy=existing["strategy"],
|
| 236 |
+
confidence=existing["confidence"],
|
| 237 |
+
)
|
| 238 |
+
else:
|
| 239 |
+
# Rare: registry restructure has moved the canonical
|
| 240 |
+
# for this raw_value since the alias was written.
|
| 241 |
+
# The alias entry is the source of truth for "what
|
| 242 |
+
# this raw resolved to" β accept the leaf clobber.
|
| 243 |
+
enriched = resolver.build_result(
|
| 244 |
+
raw_value, entity_type, source_config,
|
| 245 |
+
existing["canonical_id"], existing["strategy"], existing["confidence"],
|
| 246 |
+
)
|
| 247 |
result_dict = _result_to_dict(enriched, created_new=False)
|
| 248 |
self._resolve_cache[cache_key] = result_dict
|
| 249 |
return result_dict
|
|
|
|
| 324 |
if created_new:
|
| 325 |
self.invalidate_resolver()
|
| 326 |
|
| 327 |
+
# Build the enriched response. Two cases:
|
| 328 |
+
# 1. Match found β the original `result` already carries the
|
| 329 |
+
# correct canonical_id (root-collapsed), resolved_leaf_id
|
| 330 |
+
# (the matched leaf), parents, and metadata. Don't re-run
|
| 331 |
+
# `build_result` here: it would call `model_metadata_fields`
|
| 332 |
+
# with the ROOT id, which can't recover the leaf and ends
|
| 333 |
+
# up returning resolved_leaf_id = canonical_id. The alias
|
| 334 |
+
# write earlier doesn't change canonical_models β `result`
|
| 335 |
+
# stays accurate.
|
| 336 |
+
# 2. Auto-create β `result.canonical_id` was None, the new
|
| 337 |
+
# `canonical_id` came from `_auto_create_entity`. The new
|
| 338 |
+
# canonical IS the leaf (its parents may point at family
|
| 339 |
+
# via the inferred version-axis edge), so `build_result`
|
| 340 |
+
# with the new id correctly preserves leaf info via
|
| 341 |
+
# `model_metadata_fields`. The `invalidate_resolver()`
|
| 342 |
+
# above ensures the canonical_store snapshot sees the new
|
| 343 |
+
# row, but the entity may still sit in the pending-write
|
| 344 |
+
# buffer; on lookup miss the review_status falls back to
|
| 345 |
+
# None and we override to 'draft' below.
|
| 346 |
+
if created_new:
|
| 347 |
+
resolver = self._get_resolver()
|
| 348 |
+
enriched = resolver.build_result(
|
| 349 |
+
raw_value, entity_type, source_config,
|
| 350 |
+
canonical_id, strategy_used, result.confidence,
|
| 351 |
+
)
|
| 352 |
+
else:
|
| 353 |
+
enriched = result
|
| 354 |
result_dict = _result_to_dict(enriched, created_new=created_new)
|
| 355 |
if created_new and result_dict.get("review_status") is None:
|
| 356 |
result_dict["review_status"] = "draft"
|
|
|
|
| 388 |
# β `enrichment` is `{}` on lookup miss or any error.
|
| 389 |
enrichment: dict = {}
|
| 390 |
if entity_type == "model" and self._looks_like_hf_id(raw_value):
|
| 391 |
+
enrichment = self._lookup_hub_stats(raw_value, target_canonical=candidate_id) or {}
|
| 392 |
if entity_type == "model":
|
| 393 |
base.update({
|
| 394 |
"developer": None,
|
|
|
|
| 409 |
for k, v in enrichment.items():
|
| 410 |
if v is not None:
|
| 411 |
base[k] = v
|
| 412 |
+
# Family-version inference fallback: when hub-stats misses
|
| 413 |
+
# (parquet stale, lookup disabled, rate-limited, or row
|
| 414 |
+
# absent), the snapshot still has its shape β try to infer a
|
| 415 |
+
# version-axis parent from just the alias index. The
|
| 416 |
+
# inference is alias-lookup-only, so it never manufactures
|
| 417 |
+
# a false parent. Idempotent with the inference inside
|
| 418 |
+
# enrich_draft_from_row: only fires when no version-axis
|
| 419 |
+
# edge is already present.
|
| 420 |
+
if self._looks_like_hf_id(raw_value):
|
| 421 |
+
self._maybe_infer_family_parent(base, raw_value, candidate_id)
|
| 422 |
elif entity_type == "benchmark":
|
| 423 |
base.update({"description": None, "dataset_repo": None, "parent_benchmark_id": None, "tags": "[]"})
|
| 424 |
elif entity_type == "metric":
|
|
|
|
| 437 |
queries.upsert_entity(self.store, table, base, buffered=True)
|
| 438 |
return candidate_id
|
| 439 |
|
| 440 |
+
def _maybe_infer_family_parent(
|
| 441 |
+
self, base: dict, raw_value: str, candidate_id: str,
|
| 442 |
+
) -> None:
|
| 443 |
+
"""Mutate `base['parents']` to add a `{variant, axis: version}`
|
| 444 |
+
edge when the raw value's snapshot shape resolves to an existing
|
| 445 |
+
family canonical via the alias index. Runs independently of
|
| 446 |
+
hub-stats so brand-new releases not yet in the parquet still
|
| 447 |
+
get linked into the lineage graph."""
|
| 448 |
+
try:
|
| 449 |
+
existing = json.loads(base.get("parents") or "[]")
|
| 450 |
+
except (ValueError, TypeError):
|
| 451 |
+
existing = []
|
| 452 |
+
if any(
|
| 453 |
+
p.get("relationship") == "variant" and p.get("axis") == "version"
|
| 454 |
+
for p in existing
|
| 455 |
+
if isinstance(p, dict)
|
| 456 |
+
):
|
| 457 |
+
return
|
| 458 |
+
from eval_card_registry.services.hub_stats import infer_family_parent_edge
|
| 459 |
+
try:
|
| 460 |
+
aliases_to_canonical, _ = self._build_hub_stats_indices()
|
| 461 |
+
except Exception:
|
| 462 |
+
return
|
| 463 |
+
edge = infer_family_parent_edge(
|
| 464 |
+
raw_value, aliases_to_canonical, target_canonical=candidate_id,
|
| 465 |
+
)
|
| 466 |
+
if edge is None:
|
| 467 |
+
return
|
| 468 |
+
existing.append(edge)
|
| 469 |
+
base["parents"] = json.dumps(existing)
|
| 470 |
+
|
| 471 |
@staticmethod
|
| 472 |
def _looks_like_hf_id(raw_value: str) -> bool:
|
| 473 |
"""HF id heuristic: contains a single `/` with non-empty parts on
|
|
|
|
| 479 |
org, name = raw_value.split("/", 1)
|
| 480 |
return bool(org.strip()) and bool(name.strip())
|
| 481 |
|
| 482 |
+
def _lookup_hub_stats(
|
| 483 |
+
self, hf_id: str, target_canonical: Optional[str] = None,
|
| 484 |
+
) -> Optional[dict]:
|
| 485 |
"""Query hub-stats live for `hf_id` and return a partial draft
|
| 486 |
dict (release_date, params_billions, parents, lineage_origin_org_id,
|
| 487 |
tags, metadata) ready to merge. Returns None on miss or any error.
|
| 488 |
Uses the `aliases` table to resolve baseModels parents to our
|
| 489 |
+
canonical ids, and `canonical_orgs` HF aliases to map authors.
|
| 490 |
+
|
| 491 |
+
`target_canonical` is the candidate canonical id of the draft
|
| 492 |
+
being created β passed through to enrich_draft_from_row so the
|
| 493 |
+
family-version inference can suppress a self-edge."""
|
| 494 |
if not settings.hub_stats_lookup_enabled:
|
| 495 |
return None
|
| 496 |
try:
|
|
|
|
| 503 |
from eval_card_registry.services import hub_stats as _hs
|
| 504 |
try:
|
| 505 |
aliases_to_canonical, org_alias_map = self._build_hub_stats_indices()
|
| 506 |
+
return _hs.enrich_draft_from_row(
|
| 507 |
+
row, aliases_to_canonical, org_alias_map,
|
| 508 |
+
target_canonical=target_canonical,
|
| 509 |
+
)
|
| 510 |
except Exception:
|
| 511 |
return None
|
| 512 |
|