JacobLinCool Codex commited on
Commit
ca84660
·
verified ·
1 Parent(s): 1a39d73

fix: canonicalize composite quest labels

Browse files

Co-authored-by: Codex <noreply@openai.com>

hackathon_advisor/quest_analysis.py CHANGED
@@ -20,6 +20,7 @@ from hackathon_advisor.quest_taxonomy import (
20
  QUESTS,
21
  build_app_segment,
22
  build_readme_segment,
 
23
  normalize_match,
24
  render_quest_prompt,
25
  )
@@ -332,13 +333,18 @@ def _validate_project_matches(raw_matches: Any, project_id: str) -> list[dict[st
332
  if not isinstance(raw_match, dict):
333
  raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
334
  try:
335
- match = normalize_match(raw_match)
336
  except ValueError as error:
337
  raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
338
- if match["quest"] in seen:
339
- raise QuestAnalysisError(f"duplicate quest for {project_id}: {match['quest']}")
340
- seen.add(match["quest"])
341
- matches.append(match)
 
 
 
 
 
342
  return matches
343
 
344
 
 
20
  QUESTS,
21
  build_app_segment,
22
  build_readme_segment,
23
+ canonical_quest_ids,
24
  normalize_match,
25
  render_quest_prompt,
26
  )
 
333
  if not isinstance(raw_match, dict):
334
  raise QuestAnalysisError(f"quest matches for {project_id} must be objects")
335
  try:
336
+ quest_ids = canonical_quest_ids(raw_match.get("quest"))
337
  except ValueError as error:
338
  raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
339
+ for quest_id in quest_ids:
340
+ try:
341
+ match = normalize_match({**raw_match, "quest": quest_id})
342
+ except ValueError as error:
343
+ raise QuestAnalysisError(f"quest match for {project_id}: {error}") from error
344
+ if match["quest"] in seen:
345
+ raise QuestAnalysisError(f"duplicate quest for {project_id}: {match['quest']}")
346
+ seen.add(match["quest"])
347
+ matches.append(match)
348
  return matches
349
 
350
 
hackathon_advisor/quest_taxonomy.py CHANGED
@@ -127,6 +127,33 @@ QUESTS: tuple[str, ...] = tuple(profile["id"] for profile in QUEST_PROFILES)
127
  QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  def quest_profiles() -> list[dict[str, str]]:
131
  return [
132
  {"id": profile["id"], "label": profile["label"], "description": profile["description"]}
@@ -142,6 +169,9 @@ def canonical_quest_id(raw_quest: Any) -> str:
142
  quest = " ".join(str(raw_quest or "").split())
143
  if quest in QUEST_PROFILE_BY_ID:
144
  return quest
 
 
 
145
  folded = quest.casefold()
146
  for known in QUESTS:
147
  known_folded = known.casefold()
@@ -152,6 +182,25 @@ def canonical_quest_id(raw_quest: Any) -> str:
152
  raise ValueError(f"unknown quest: {quest!r}")
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def _clip(text: str, limit: int) -> str:
156
  cleaned = (text or "").strip()
157
  if len(cleaned) <= limit:
 
127
  QUEST_PROFILE_BY_ID: dict[str, dict[str, str]] = {profile["id"]: profile for profile in QUEST_PROFILES}
128
 
129
 
130
+ def _quest_key(raw: Any) -> str:
131
+ text = " ".join(str(raw or "").replace("&", " and ").casefold().split())
132
+ return re.sub(r"[^a-z0-9]+", " ", text).strip()
133
+
134
+
135
+ _QUEST_ALIASES: dict[str, str] = {}
136
+ for _profile in QUEST_PROFILES:
137
+ _QUEST_ALIASES[_quest_key(_profile["id"])] = _profile["id"]
138
+ _QUEST_ALIASES[_quest_key(_profile["label"])] = _profile["id"]
139
+ _QUEST_ALIASES.update(
140
+ {
141
+ _quest_key("Best MiniCPM Build"): "OpenBMB",
142
+ _quest_key("MiniCPM Build"): "OpenBMB",
143
+ _quest_key("MiniCPM"): "OpenBMB",
144
+ _quest_key("OpenBMB / MiniCPM"): "OpenBMB",
145
+ _quest_key("Small model <=4B"): "Tiny Titan",
146
+ _quest_key("Small model under 4B"): "Tiny Titan",
147
+ _quest_key("Shareable output"): "Sharing is Caring",
148
+ _quest_key("Custom UI"): "Off-Brand",
149
+ _quest_key("Custom interface"): "Off-Brand",
150
+ _quest_key("Local first"): "Off the Grid",
151
+ _quest_key("Fine tuned"): "Well-Tuned",
152
+ _quest_key("Fine tune"): "Well-Tuned",
153
+ }
154
+ )
155
+
156
+
157
  def quest_profiles() -> list[dict[str, str]]:
158
  return [
159
  {"id": profile["id"], "label": profile["label"], "description": profile["description"]}
 
169
  quest = " ".join(str(raw_quest or "").split())
170
  if quest in QUEST_PROFILE_BY_ID:
171
  return quest
172
+ alias = _QUEST_ALIASES.get(_quest_key(quest))
173
+ if alias:
174
+ return alias
175
  folded = quest.casefold()
176
  for known in QUESTS:
177
  known_folded = known.casefold()
 
182
  raise ValueError(f"unknown quest: {quest!r}")
183
 
184
 
185
+ def canonical_quest_ids(raw_quest: Any) -> tuple[str, ...]:
186
+ quest = " ".join(str(raw_quest or "").split())
187
+ try:
188
+ return (canonical_quest_id(quest),)
189
+ except ValueError as original_error:
190
+ parts = [part.strip() for part in re.split(r"\s*/\s*", quest) if part.strip()]
191
+ if len(parts) <= 1:
192
+ raise original_error
193
+ canonical: list[str] = []
194
+ for part in parts:
195
+ try:
196
+ quest_id = canonical_quest_id(part)
197
+ except ValueError as error:
198
+ raise ValueError(f"unknown quest in composite {quest!r}: {part!r}") from error
199
+ if quest_id not in canonical:
200
+ canonical.append(quest_id)
201
+ return tuple(canonical)
202
+
203
+
204
  def _clip(text: str, limit: int) -> str:
205
  cleaned = (text or "").strip()
206
  if len(cleaned) <= limit:
tests/test_dashboard.py CHANGED
@@ -196,6 +196,62 @@ def test_quest_analysis_validation_canonicalizes_known_label_suffixes() -> None:
196
  assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
197
 
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  def test_quest_json_extractor_accepts_fenced_object() -> None:
200
  payload = _extract_json_object('```json\n{"projects":[]}\n```')
201
 
 
196
  assert validated.matches_by_project[projects[0].id][0]["quest"] == "Off the Grid"
197
 
198
 
199
+ def test_quest_analysis_validation_expands_known_composite_quest_labels() -> None:
200
+ projects = fake_projects(1)
201
+ raw = {
202
+ "projects": [
203
+ {
204
+ "project_id": projects[0].id,
205
+ "matches": [
206
+ {
207
+ "quest": "Best MiniCPM Build / Tiny Titan",
208
+ "confidence": 0.84,
209
+ "evidence": "MiniCPM5-1B model",
210
+ "source": "app_file",
211
+ },
212
+ {
213
+ "quest": "Off-Brand / Sharing is Caring",
214
+ "confidence": 0.72,
215
+ "evidence": "custom UI exports a card",
216
+ "source": "readme",
217
+ },
218
+ ],
219
+ }
220
+ ]
221
+ }
222
+
223
+ validated = validate_quest_analysis_payload(raw, projects, source="fake")
224
+
225
+ quests = [match["quest"] for match in validated.matches_by_project[projects[0].id]]
226
+ assert quests == ["OpenBMB", "Tiny Titan", "Off-Brand", "Sharing is Caring"]
227
+
228
+
229
+ def test_quest_analysis_validation_rejects_unknown_composite_quest_labels() -> None:
230
+ projects = fake_projects(1)
231
+ raw = {
232
+ "projects": [
233
+ {
234
+ "project_id": projects[0].id,
235
+ "matches": [
236
+ {
237
+ "quest": "Mystery Award / Tiny Titan",
238
+ "confidence": 0.84,
239
+ "evidence": "tiny model",
240
+ "source": "app_file",
241
+ }
242
+ ],
243
+ }
244
+ ]
245
+ }
246
+
247
+ try:
248
+ validate_quest_analysis_payload(raw, projects, source="fake")
249
+ except QuestAnalysisError as error:
250
+ assert "unknown quest in composite" in str(error)
251
+ else:
252
+ raise AssertionError("unknown composite quest labels must be rejected")
253
+
254
+
255
  def test_quest_json_extractor_accepts_fenced_object() -> None:
256
  payload = _extract_json_object('```json\n{"projects":[]}\n```')
257