Spaces:
Running on Zero
Running on Zero
fix: canonicalize composite quest labels
Browse filesCo-authored-by: Codex <noreply@openai.com>
hackathon_advisor/quest_analysis.py
CHANGED
|
@@ -20,6 +20,7 @@ from hackathon_advisor.quest_taxonomy import (
|
|
| 20 |
QUESTS,
|
| 21 |
build_app_segment,
|
| 22 |
build_readme_segment,
|
|
|
|
| 23 |
normalize_match,
|
| 24 |
render_quest_prompt,
|
| 25 |
)
|
|
@@ -332,13 +333,18 @@ def _validate_project_matches(raw_matches: Any, project_id: str) -> list[dict[st
|
|
| 332 |
if not isinstance(raw_match, dict):
|
| 333 |
raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
|
| 334 |
try:
|
| 335 |
-
|
| 336 |
except ValueError as error:
|
| 337 |
raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
return matches
|
| 343 |
|
| 344 |
|
|
|
|
| 20 |
QUESTS,
|
| 21 |
build_app_segment,
|
| 22 |
build_readme_segment,
|
| 23 |
+
canonical_quest_ids,
|
| 24 |
normalize_match,
|
| 25 |
render_quest_prompt,
|
| 26 |
)
|
|
|
|
| 333 |
if not isinstance(raw_match, dict):
|
| 334 |
raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
|
| 335 |
try:
|
| 336 |
+
quest_ids = canonical_quest_ids(raw_match.get("quest"))
|
| 337 |
except ValueError as error:
|
| 338 |
raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
|
| 339 |
+
for quest_id in quest_ids:
|
| 340 |
+
try:
|
| 341 |
+
match = normalize_match({**raw_match, "quest": quest_id})
|
| 342 |
+
except ValueError as error:
|
| 343 |
+
raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
|
| 344 |
+
if match["quest"] in seen:
|
| 345 |
+
raise QuestAnalysisError(f"duplicate quest for {project_id}: {match['quest']}")
|
| 346 |
+
seen.add(match["quest"])
|
| 347 |
+
matches.append(match)
|
| 348 |
return matches
|
| 349 |
|
| 350 |
|
hackathon_advisor/quest_taxonomy.py
CHANGED
|
@@ -127,6 +127,33 @@ QUESTS: tuple[str, ...] = tuple(profile["id"] for profile in QUEST_PROFILES)
|
|
| 127 |
QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
|
| 128 |
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
def quest_profiles() -> list[dict[str, str]]:
|
| 131 |
return [
|
| 132 |
{"id": profile["id"], "label": profile["label"], "description": profile["description"]}
|
|
@@ -142,6 +169,9 @@ def canonical_quest_id(raw_quest: Any) -> str:
|
|
| 142 |
quest = " ".join(str(raw_quest or "").split())
|
| 143 |
if quest in QUEST_PROFILE_BY_ID:
|
| 144 |
return quest
|
|
|
|
|
|
|
|
|
|
| 145 |
folded = quest.casefold()
|
| 146 |
for known in QUESTS:
|
| 147 |
known_folded = known.casefold()
|
|
@@ -152,6 +182,25 @@ def canonical_quest_id(raw_quest: Any) -> str:
|
|
| 152 |
raise ValueError(f"unknown quest: {quest!r}")
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def _clip(text: str, limit: int) -> str:
|
| 156 |
cleaned = (text or "").strip()
|
| 157 |
if len(cleaned) <= limit:
|
|
|
|
| 127 |
QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
|
| 128 |
|
| 129 |
|
| 130 |
+
def _quest_key(raw: Any) -> str:
|
| 131 |
+
text = " ".join(str(raw or "").replace("&", " and ").casefold().split())
|
| 132 |
+
return re.sub(r"[^a-z0-9]+", " ", text).strip()
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
_QUEST_ALIASES: dict[str, str] = {}
|
| 136 |
+
for _profile in QUEST_PROFILES:
|
| 137 |
+
_QUEST_ALIASES[_quest_key(_profile["id"])] = _profile["id"]
|
| 138 |
+
_QUEST_ALIASES[_quest_key(_profile["label"])] = _profile["id"]
|
| 139 |
+
_QUEST_ALIASES.update(
|
| 140 |
+
{
|
| 141 |
+
_quest_key("Best MiniCPM Build"): "OpenBMB",
|
| 142 |
+
_quest_key("MiniCPM Build"): "OpenBMB",
|
| 143 |
+
_quest_key("MiniCPM"): "OpenBMB",
|
| 144 |
+
_quest_key("OpenBMB / MiniCPM"): "OpenBMB",
|
| 145 |
+
_quest_key("Small model <=4B"): "Tiny Titan",
|
| 146 |
+
_quest_key("Small model under 4B"): "Tiny Titan",
|
| 147 |
+
_quest_key("Shareable output"): "Sharing is Caring",
|
| 148 |
+
_quest_key("Custom UI"): "Off-Brand",
|
| 149 |
+
_quest_key("Custom interface"): "Off-Brand",
|
| 150 |
+
_quest_key("Local first"): "Off the Grid",
|
| 151 |
+
_quest_key("Fine tuned"): "Well-Tuned",
|
| 152 |
+
_quest_key("Fine tune"): "Well-Tuned",
|
| 153 |
+
}
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
def quest_profiles() -> list[dict[str, str]]:
|
| 158 |
return [
|
| 159 |
{"id": profile["id"], "label": profile["label"], "description": profile["description"]}
|
|
|
|
| 169 |
quest = " ".join(str(raw_quest or "").split())
|
| 170 |
if quest in QUEST_PROFILE_BY_ID:
|
| 171 |
return quest
|
| 172 |
+
alias = _QUEST_ALIASES.get(_quest_key(quest))
|
| 173 |
+
if alias:
|
| 174 |
+
return alias
|
| 175 |
folded = quest.casefold()
|
| 176 |
for known in QUESTS:
|
| 177 |
known_folded = known.casefold()
|
|
|
|
| 182 |
raise ValueError(f"unknown quest: {quest!r}")
|
| 183 |
|
| 184 |
|
| 185 |
+
def canonical_quest_ids(raw_quest: Any) -> tuple[str, ...]:
|
| 186 |
+
quest = " ".join(str(raw_quest or "").split())
|
| 187 |
+
try:
|
| 188 |
+
return (canonical_quest_id(quest),)
|
| 189 |
+
except ValueError as original_error:
|
| 190 |
+
parts = [part.strip() for part in re.split(r"\s*/\s*", quest) if part.strip()]
|
| 191 |
+
if len(parts) <= 1:
|
| 192 |
+
raise original_error
|
| 193 |
+
canonical: list[str] = []
|
| 194 |
+
for part in parts:
|
| 195 |
+
try:
|
| 196 |
+
quest_id = canonical_quest_id(part)
|
| 197 |
+
except ValueError as error:
|
| 198 |
+
raise ValueError(f"unknown quest in composite {quest!r}: {part!r}") from error
|
| 199 |
+
if quest_id not in canonical:
|
| 200 |
+
canonical.append(quest_id)
|
| 201 |
+
return tuple(canonical)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
def _clip(text: str, limit: int) -> str:
|
| 205 |
cleaned = (text or "").strip()
|
| 206 |
if len(cleaned) <= limit:
|
tests/test_dashboard.py
CHANGED
|
@@ -196,6 +196,62 @@ def test_quest_analysis_validation_canonicalizes_known_label_suffixes() -> None:
|
|
| 196 |
assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
|
| 197 |
|
| 198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
def test_quest_json_extractor_accepts_fenced_object() -> None:
|
| 200 |
payload = _extract_json_object('```json\n{"projects":[]}\n```')
|
| 201 |
|
|
|
|
| 196 |
assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
|
| 197 |
|
| 198 |
|
| 199 |
+
def test_quest_analysis_validation_expands_known_composite_quest_labels() -> None:
|
| 200 |
+
projects = fake_projects(1)
|
| 201 |
+
raw = {
|
| 202 |
+
"projects": [
|
| 203 |
+
{
|
| 204 |
+
"project_id": projects[0].id,
|
| 205 |
+
"matches": [
|
| 206 |
+
{
|
| 207 |
+
"quest": "Best MiniCPM Build / Tiny Titan",
|
| 208 |
+
"confidence": 0.84,
|
| 209 |
+
"evidence": "MiniCPM5-1B model",
|
| 210 |
+
"source": "app_file",
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"quest": "Off-Brand / Sharing is Caring",
|
| 214 |
+
"confidence": 0.72,
|
| 215 |
+
"evidence": "custom UI exports a card",
|
| 216 |
+
"source": "readme",
|
| 217 |
+
},
|
| 218 |
+
],
|
| 219 |
+
}
|
| 220 |
+
]
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
validated = validate_quest_analysis_payload(raw, projects, source="fake")
|
| 224 |
+
|
| 225 |
+
quests = [match["quest"] for match in validated.matches_by_project[projects[0].id]]
|
| 226 |
+
assert quests == ["OpenBMB", "Tiny Titan", "Off-Brand", "Sharing is Caring"]
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def test_quest_analysis_validation_rejects_unknown_composite_quest_labels() -> None:
|
| 230 |
+
projects = fake_projects(1)
|
| 231 |
+
raw = {
|
| 232 |
+
"projects": [
|
| 233 |
+
{
|
| 234 |
+
"project_id": projects[0].id,
|
| 235 |
+
"matches": [
|
| 236 |
+
{
|
| 237 |
+
"quest": "Mystery Award / Tiny Titan",
|
| 238 |
+
"confidence": 0.84,
|
| 239 |
+
"evidence": "tiny model",
|
| 240 |
+
"source": "app_file",
|
| 241 |
+
}
|
| 242 |
+
],
|
| 243 |
+
}
|
| 244 |
+
]
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
try:
|
| 248 |
+
validate_quest_analysis_payload(raw, projects, source="fake")
|
| 249 |
+
except QuestAnalysisError as error:
|
| 250 |
+
assert "unknown quest in composite" in str(error)
|
| 251 |
+
else:
|
| 252 |
+
raise AssertionError("unknown composite quest labels must be rejected")
|
| 253 |
+
|
| 254 |
+
|
| 255 |
def test_quest_json_extractor_accepts_fenced_object() -> None:
|
| 256 |
payload = _extract_json_object('```json\n{"projects":[]}\n```')
|
| 257 |
|