Spaces:
Sleeping
Sleeping
| """Pure-function Jaccard top-k retrieval over tag sets. | |
| No embeddings, no torch, no tokenizer. Deterministic — same inputs | |
| produce the same ranking, same top-k, same tie-break. | |
| A conservative Jaccard threshold (0.3) limits retrieval noise when | |
| broad pre-seeds would otherwise match every scenario. | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import Iterable | |
| from skill_library.entries import ( | |
| DriftAdaptationCard, | |
| PlaybookEntry, | |
| RetrievalResult, | |
| ) | |
| JACCARD_MIN: float = 0.3 | |
| def jaccard(a: frozenset[str], b: frozenset[str]) -> float: | |
| """Standard Jaccard on sets.""" | |
| if not a and not b: | |
| return 1.0 | |
| if not a or not b: | |
| return 0.0 | |
| return len(a & b) / len(a | b) | |
| def top_k_playbook( | |
| query_tags: frozenset[str], | |
| entries: Iterable[PlaybookEntry], | |
| k: int = 3, | |
| *, | |
| min_overlap: float = JACCARD_MIN, | |
| ) -> tuple[PlaybookEntry, ...]: | |
| """Top-k playbook entries by Jaccard overlap with ``query_tags``. | |
| Ties broken by descending ``avg_speedup`` then by ``before_snippet`` | |
| lexicographic order so the result is stable across runs. | |
| """ | |
| scored = [(jaccard(query_tags, e.tag_set), e) for e in entries] | |
| scored = [(j, e) for j, e in scored if j >= min_overlap] | |
| scored.sort(key=lambda t: (-t[0], -t[1].avg_speedup, t[1].before_snippet)) | |
| return tuple(e for _, e in scored[:k]) | |
| def top_k_drift_cards( | |
| drift_kind: str | None, | |
| cards: Iterable[DriftAdaptationCard], | |
| k: int = 1, | |
| ) -> tuple[DriftAdaptationCard, ...]: | |
| """Filter cards by exact drift_kind match, sorted by success_rate desc.""" | |
| if drift_kind is None: | |
| return () | |
| matches = [c for c in cards if c.drift_kind == drift_kind] | |
| matches.sort(key=lambda c: (-c.success_rate, c.drift_kind)) | |
| return tuple(matches[:k]) | |
| def retrieve( | |
| query_tags: frozenset[str], | |
| drift_kind: str | None, | |
| playbook: Iterable[PlaybookEntry], | |
| drift_cards: Iterable[DriftAdaptationCard], | |
| *, | |
| playbook_k: int = 3, | |
| drift_k: int = 1, | |
| ) -> RetrievalResult: | |
| """Combined retrieval: top-k playbook + top-k drift cards.""" | |
| return RetrievalResult( | |
| playbook=top_k_playbook(query_tags, playbook, k=playbook_k), | |
| drift_cards=top_k_drift_cards(drift_kind, drift_cards, k=drift_k), | |
| ) | |
| __all__ = [ | |
| "JACCARD_MIN", | |
| "jaccard", | |
| "retrieve", | |
| "top_k_drift_cards", | |
| "top_k_playbook", | |
| ] | |