Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

JacobLinCool Codex commited on Jun 8

Commit

ca84660

verified ·

1 Parent(s): 1a39d73

fix: canonicalize composite quest labels

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (3) hide show

hackathon_advisor/quest_analysis.py +11 -5
hackathon_advisor/quest_taxonomy.py +49 -0
tests/test_dashboard.py +56 -0

hackathon_advisor/quest_analysis.py CHANGED Viewed

@@ -20,6 +20,7 @@ from hackathon_advisor.quest_taxonomy import (
     QUESTS,
     build_app_segment,
     build_readme_segment,
     normalize_match,
     render_quest_prompt,
 )
@@ -332,13 +333,18 @@ def _validate_project_matches(raw_matches: Any, project_id: str) -> list[dict[st
         if not isinstance(raw_match, dict):
             raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
         try:
-            match = normalize_match(raw_match)
         except ValueError as error:
             raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
-        if match["quest"] in seen:
-            raise QuestAnalysisError(f"duplicate quest for {project_id}: {match['quest']}")
-        seen.add(match["quest"])
-        matches.append(match)
     return matches

     QUESTS,
     build_app_segment,
     build_readme_segment,
+    canonical_quest_ids,
     normalize_match,
     render_quest_prompt,
 )
         if not isinstance(raw_match, dict):
             raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
         try:
+            quest_ids = canonical_quest_ids(raw_match.get("quest"))
         except ValueError as error:
             raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
+        for quest_id in quest_ids:
+            try:
+                match = normalize_match({**raw_match, "quest": quest_id})
+            except ValueError as error:
+                raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
+            if match["quest"] in seen:
+                raise QuestAnalysisError(f"duplicate quest for {project_id}: {match['quest']}")
+            seen.add(match["quest"])
+            matches.append(match)
     return matches

hackathon_advisor/quest_taxonomy.py CHANGED Viewed

@@ -127,6 +127,33 @@ QUESTS: tuple[str, ...] = tuple(profile["id"] for profile in QUEST_PROFILES)
 QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
 def quest_profiles() -> list[dict[str, str]]:
     return [
         {"id": profile["id"], "label": profile["label"], "description": profile["description"]}
@@ -142,6 +169,9 @@ def canonical_quest_id(raw_quest: Any) -> str:
     quest = " ".join(str(raw_quest or "").split())
     if quest in QUEST_PROFILE_BY_ID:
         return quest
     folded = quest.casefold()
     for known in QUESTS:
         known_folded = known.casefold()
@@ -152,6 +182,25 @@ def canonical_quest_id(raw_quest: Any) -> str:
     raise ValueError(f"unknown quest: {quest!r}")
 def _clip(text: str, limit: int) -> str:
     cleaned = (text or "").strip()
     if len(cleaned) <= limit:

 QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
+def _quest_key(raw: Any) -> str:
+    text = " ".join(str(raw or "").replace("&", " and ").casefold().split())
+    return re.sub(r"[^a-z0-9]+", " ", text).strip()
+_QUEST_ALIASES: dict[str, str] = {}
+for _profile in QUEST_PROFILES:
+    _QUEST_ALIASES[_quest_key(_profile["id"])] = _profile["id"]
+    _QUEST_ALIASES[_quest_key(_profile["label"])] = _profile["id"]
+_QUEST_ALIASES.update(
+    {
+        _quest_key("Best MiniCPM Build"): "OpenBMB",
+        _quest_key("MiniCPM Build"): "OpenBMB",
+        _quest_key("MiniCPM"): "OpenBMB",
+        _quest_key("OpenBMB / MiniCPM"): "OpenBMB",
+        _quest_key("Small model <=4B"): "Tiny Titan",
+        _quest_key("Small model under 4B"): "Tiny Titan",
+        _quest_key("Shareable output"): "Sharing is Caring",
+        _quest_key("Custom UI"): "Off-Brand",
+        _quest_key("Custom interface"): "Off-Brand",
+        _quest_key("Local first"): "Off the Grid",
+        _quest_key("Fine tuned"): "Well-Tuned",
+        _quest_key("Fine tune"): "Well-Tuned",
+    }
+)
 def quest_profiles() -> list[dict[str, str]]:
     return [
         {"id": profile["id"], "label": profile["label"], "description": profile["description"]}
     quest = " ".join(str(raw_quest or "").split())
     if quest in QUEST_PROFILE_BY_ID:
         return quest
+    alias = _QUEST_ALIASES.get(_quest_key(quest))
+    if alias:
+        return alias
     folded = quest.casefold()
     for known in QUESTS:
         known_folded = known.casefold()
     raise ValueError(f"unknown quest: {quest!r}")
+def canonical_quest_ids(raw_quest: Any) -> tuple[str, ...]:
+    quest = " ".join(str(raw_quest or "").split())
+    try:
+        return (canonical_quest_id(quest),)
+    except ValueError as original_error:
+        parts = [part.strip() for part in re.split(r"\s*/\s*", quest) if part.strip()]
+        if len(parts) <= 1:
+            raise original_error
+    canonical: list[str] = []
+    for part in parts:
+        try:
+            quest_id = canonical_quest_id(part)
+        except ValueError as error:
+            raise ValueError(f"unknown quest in composite {quest!r}: {part!r}") from error
+        if quest_id not in canonical:
+            canonical.append(quest_id)
+    return tuple(canonical)
 def _clip(text: str, limit: int) -> str:
     cleaned = (text or "").strip()
     if len(cleaned) <= limit:

tests/test_dashboard.py CHANGED Viewed

@@ -196,6 +196,62 @@ def test_quest_analysis_validation_canonicalizes_known_label_suffixes() -> None:
     assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
 def test_quest_json_extractor_accepts_fenced_object() -> None:
     payload = _extract_json_object('```json\n{"projects":[]}\n```')

     assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
+def test_quest_analysis_validation_expands_known_composite_quest_labels() -> None:
+    projects = fake_projects(1)
+    raw = {
+        "projects": [
+            {
+                "project_id": projects[0].id,
+                "matches": [
+                    {
+                        "quest": "Best MiniCPM Build / Tiny Titan",
+                        "confidence": 0.84,
+                        "evidence": "MiniCPM5-1B model",
+                        "source": "app_file",
+                    },
+                    {
+                        "quest": "Off-Brand / Sharing is Caring",
+                        "confidence": 0.72,
+                        "evidence": "custom UI exports a card",
+                        "source": "readme",
+                    },
+                ],
+            }
+        ]
+    }
+    validated = validate_quest_analysis_payload(raw, projects, source="fake")
+    quests = [match["quest"] for match in validated.matches_by_project[projects[0].id]]
+    assert quests == ["OpenBMB", "Tiny Titan", "Off-Brand", "Sharing is Caring"]
+def test_quest_analysis_validation_rejects_unknown_composite_quest_labels() -> None:
+    projects = fake_projects(1)
+    raw = {
+        "projects": [
+            {
+                "project_id": projects[0].id,
+                "matches": [
+                    {
+                        "quest": "Mystery Award / Tiny Titan",
+                        "confidence": 0.84,
+                        "evidence": "tiny model",
+                        "source": "app_file",
+                    }
+                ],
+            }
+        ]
+    }
+    try:
+        validate_quest_analysis_payload(raw, projects, source="fake")
+    except QuestAnalysisError as error:
+        assert "unknown quest in composite" in str(error)
+    else:
+        raise AssertionError("unknown composite quest labels must be rejected")
 def test_quest_json_extractor_accepts_fenced_object() -> None:
     payload = _extract_json_object('```json\n{"projects":[]}\n```')