Spaces:
Running on Zero
Running on Zero
fix: ignore hosting region tags
Browse filesCo-authored-by: Codex <noreply@openai.com>
- app.py +37 -3
- hackathon_advisor/dashboard.py +164 -17
- hackathon_advisor/data.py +22 -1
- hackathon_advisor/quest_analysis.py +2 -2
- scripts/crawl_hf_spaces.py +3 -2
- static/app.js +5 -1
- tests/test_app.py +18 -0
- tests/test_crawl_hf_spaces.py +2 -1
- tests/test_dashboard.py +70 -2
- tests/test_data.py +17 -0
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import sys
|
|
| 10 |
import tempfile
|
| 11 |
from threading import Lock, Thread
|
| 12 |
import time
|
|
|
|
| 13 |
from typing import Any, Iterator
|
| 14 |
from uuid import uuid4
|
| 15 |
|
|
@@ -29,7 +30,13 @@ from hackathon_advisor.dashboard_storage import (
|
|
| 29 |
persist_refresh_artifacts,
|
| 30 |
require_writable_cache_dir,
|
| 31 |
)
|
| 32 |
-
from hackathon_advisor.data import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
|
| 34 |
from hackathon_advisor.model_runtime import create_tool_planner
|
| 35 |
from hackathon_advisor.profiling import (
|
|
@@ -242,11 +249,13 @@ def _run_refresh_job(run_id: str, cache_dir: Path) -> None:
|
|
| 242 |
},
|
| 243 |
)
|
| 244 |
except Exception as error: # noqa: BLE001 - background job must report every failure as state
|
|
|
|
|
|
|
| 245 |
_set_refresh_state(
|
| 246 |
status="failed",
|
| 247 |
stage="",
|
| 248 |
finished_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 249 |
-
error=
|
| 250 |
result=None,
|
| 251 |
)
|
| 252 |
|
|
@@ -414,6 +423,17 @@ def _format_output_tail(output_tail: list[str]) -> str:
|
|
| 414 |
return "\n".join(output_tail) if output_tail else "(no output)"
|
| 415 |
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
def _replace_runtime_from_files(projects_path: Path, index_path: Path, refreshed_dashboard: dict[str, Any]) -> None:
|
| 418 |
global index, engine, _cpu_engine, dashboard_payload
|
| 419 |
new_index = ProjectIndex.from_files(projects_path, index_path)
|
|
@@ -425,6 +445,20 @@ def _replace_runtime_from_files(projects_path: Path, index_path: Path, refreshed
|
|
| 425 |
dashboard_payload = refreshed_dashboard
|
| 426 |
|
| 427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
def _session_from_json(session_json: str = "{}") -> dict[str, Any]:
|
| 429 |
try:
|
| 430 |
session = json.loads(session_json or "{}")
|
|
@@ -521,7 +555,7 @@ def static_file(path: str) -> FileResponse:
|
|
| 521 |
@app.get("/api/dashboard")
|
| 522 |
def dashboard() -> dict:
|
| 523 |
with _runtime_lock:
|
| 524 |
-
payload =
|
| 525 |
payload["refresh"] = _refresh_public_state()
|
| 526 |
return payload
|
| 527 |
|
|
|
|
| 10 |
import tempfile
|
| 11 |
from threading import Lock, Thread
|
| 12 |
import time
|
| 13 |
+
import traceback
|
| 14 |
from typing import Any, Iterator
|
| 15 |
from uuid import uuid4
|
| 16 |
|
|
|
|
| 30 |
persist_refresh_artifacts,
|
| 31 |
require_writable_cache_dir,
|
| 32 |
)
|
| 33 |
+
from hackathon_advisor.data import (
|
| 34 |
+
DEFAULT_EMBEDDING_MODEL_FILE,
|
| 35 |
+
DEFAULT_EMBEDDING_MODEL_REPO,
|
| 36 |
+
Project,
|
| 37 |
+
ProjectIndex,
|
| 38 |
+
normalize_project_tags,
|
| 39 |
+
)
|
| 40 |
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
|
| 41 |
from hackathon_advisor.model_runtime import create_tool_planner
|
| 42 |
from hackathon_advisor.profiling import (
|
|
|
|
| 249 |
},
|
| 250 |
)
|
| 251 |
except Exception as error: # noqa: BLE001 - background job must report every failure as state
|
| 252 |
+
print("[dashboard-refresh] failed", flush=True)
|
| 253 |
+
traceback.print_exception(type(error), error, error.__traceback__)
|
| 254 |
_set_refresh_state(
|
| 255 |
status="failed",
|
| 256 |
stage="",
|
| 257 |
finished_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 258 |
+
error=_format_refresh_error(error),
|
| 259 |
result=None,
|
| 260 |
)
|
| 261 |
|
|
|
|
| 423 |
return "\n".join(output_tail) if output_tail else "(no output)"
|
| 424 |
|
| 425 |
|
| 426 |
+
def _format_refresh_error(error: BaseException) -> str:
|
| 427 |
+
parts = [f"{type(error).__name__}: {error}"]
|
| 428 |
+
cause = error.__cause__
|
| 429 |
+
if cause is not None:
|
| 430 |
+
parts.append(f"caused by {type(cause).__name__}: {cause}")
|
| 431 |
+
context = error.__context__
|
| 432 |
+
if context is not None and context is not cause:
|
| 433 |
+
parts.append(f"context {type(context).__name__}: {context}")
|
| 434 |
+
return "; ".join(parts)
|
| 435 |
+
|
| 436 |
+
|
| 437 |
def _replace_runtime_from_files(projects_path: Path, index_path: Path, refreshed_dashboard: dict[str, Any]) -> None:
|
| 438 |
global index, engine, _cpu_engine, dashboard_payload
|
| 439 |
new_index = ProjectIndex.from_files(projects_path, index_path)
|
|
|
|
| 445 |
dashboard_payload = refreshed_dashboard
|
| 446 |
|
| 447 |
|
| 448 |
+
def _public_dashboard_payload(payload: dict[str, Any]) -> dict[str, Any]:
|
| 449 |
+
public_payload = dict(payload)
|
| 450 |
+
public_payload["points"] = [_public_dashboard_point(point) for point in payload.get("points") or []]
|
| 451 |
+
return public_payload
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
def _public_dashboard_point(point: Any) -> dict[str, Any]:
|
| 455 |
+
if not isinstance(point, dict):
|
| 456 |
+
return {}
|
| 457 |
+
public_point = dict(point)
|
| 458 |
+
public_point["tags"] = list(normalize_project_tags(public_point.get("tags") or []))
|
| 459 |
+
return public_point
|
| 460 |
+
|
| 461 |
+
|
| 462 |
def _session_from_json(session_json: str = "{}") -> dict[str, Any]:
|
| 463 |
try:
|
| 464 |
session = json.loads(session_json or "{}")
|
|
|
|
| 555 |
@app.get("/api/dashboard")
|
| 556 |
def dashboard() -> dict:
|
| 557 |
with _runtime_lock:
|
| 558 |
+
payload = _public_dashboard_payload(dashboard_payload)
|
| 559 |
payload["refresh"] = _refresh_public_state()
|
| 560 |
return payload
|
| 561 |
|
hackathon_advisor/dashboard.py
CHANGED
|
@@ -6,7 +6,14 @@ from datetime import datetime, timezone
|
|
| 6 |
import math
|
| 7 |
from typing import Any
|
| 8 |
|
| 9 |
-
from hackathon_advisor.data import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from hackathon_advisor.quest_taxonomy import QUESTS, normalize_match, quest_profiles
|
| 11 |
|
| 12 |
|
|
@@ -14,12 +21,30 @@ DASHBOARD_SCHEMA_VERSION = 1
|
|
| 14 |
TSNE_RANDOM_STATE = 42
|
| 15 |
TSNE_MIN_PROJECTS = 3
|
| 16 |
LINKS_PER_PROJECT = 2
|
|
|
|
| 17 |
|
| 18 |
STOPWORDS = {
|
|
|
|
| 19 |
"agent",
|
| 20 |
"app",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"assistant",
|
|
|
|
|
|
|
|
|
|
| 22 |
"build",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"demo",
|
| 24 |
"face",
|
| 25 |
"for",
|
|
@@ -27,13 +52,55 @@ STOPWORDS = {
|
|
| 27 |
"gradio",
|
| 28 |
"hackathon",
|
| 29 |
"hugging",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"local",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
"model",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"project",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"small",
|
| 34 |
"space",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
"this",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"with",
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
|
| 39 |
|
|
@@ -79,6 +146,7 @@ def build_dashboard_payload(
|
|
| 79 |
"random_state": TSNE_RANDOM_STATE,
|
| 80 |
"perplexity": _tsne_perplexity(len(projects)),
|
| 81 |
},
|
|
|
|
| 82 |
"points": points,
|
| 83 |
"links": links,
|
| 84 |
"clusters": clusters,
|
|
@@ -205,15 +273,25 @@ def _cluster_payloads(
|
|
| 205 |
)
|
| 206 |
cluster_id_by_raw = {label: f"cluster-{position + 1}" for position, label in enumerate(ordered_raw_labels)}
|
| 207 |
clusters: list[dict[str, Any]] = []
|
|
|
|
| 208 |
for raw_label in ordered_raw_labels:
|
| 209 |
indexes = grouped[raw_label]
|
| 210 |
-
|
| 211 |
-
label = " / ".join(word.title() for word in keywords[:2]) if keywords else "Project cluster"
|
| 212 |
representatives = sorted(
|
| 213 |
-
|
| 214 |
key=lambda project: (project.likes, project.last_modified, project.title.lower()),
|
| 215 |
reverse=True,
|
| 216 |
)[:4]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
clusters.append(
|
| 218 |
{
|
| 219 |
"id": cluster_id_by_raw[raw_label],
|
|
@@ -237,20 +315,89 @@ def _cluster_center(coordinates: Sequence[tuple[float, float]], indexes: Sequenc
|
|
| 237 |
)
|
| 238 |
|
| 239 |
|
| 240 |
-
def
|
| 241 |
-
|
| 242 |
for project in projects:
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
)
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
def _normalize_quest_matches(
|
|
@@ -302,7 +449,7 @@ def _point_payloads(
|
|
| 302 |
"likes": project.likes,
|
| 303 |
"sdk": project.sdk,
|
| 304 |
"models": list(project.models),
|
| 305 |
-
"tags": list(project.tags),
|
| 306 |
"last_modified": project.last_modified,
|
| 307 |
"x": x,
|
| 308 |
"y": y,
|
|
|
|
| 6 |
import math
|
| 7 |
from typing import Any
|
| 8 |
|
| 9 |
+
from hackathon_advisor.data import (
|
| 10 |
+
Project,
|
| 11 |
+
ProjectIndex,
|
| 12 |
+
normalize_project_tags,
|
| 13 |
+
public_project_summary,
|
| 14 |
+
public_project_title,
|
| 15 |
+
tokenize,
|
| 16 |
+
)
|
| 17 |
from hackathon_advisor.quest_taxonomy import QUESTS, normalize_match, quest_profiles
|
| 18 |
|
| 19 |
|
|
|
|
| 21 |
TSNE_RANDOM_STATE = 42
|
| 22 |
TSNE_MIN_PROJECTS = 3
|
| 23 |
LINKS_PER_PROJECT = 2
|
| 24 |
+
CLUSTER_LABEL_ALGORITHM = "distinctive-keywords-v1"
|
| 25 |
|
| 26 |
STOPWORDS = {
|
| 27 |
+
"about",
|
| 28 |
"agent",
|
| 29 |
"app",
|
| 30 |
+
"apps",
|
| 31 |
+
"ai",
|
| 32 |
+
"all",
|
| 33 |
+
"an",
|
| 34 |
+
"and",
|
| 35 |
+
"are",
|
| 36 |
+
"as",
|
| 37 |
+
"at",
|
| 38 |
+
"before",
|
| 39 |
"assistant",
|
| 40 |
+
"be",
|
| 41 |
+
"been",
|
| 42 |
+
"being",
|
| 43 |
"build",
|
| 44 |
+
"build-small",
|
| 45 |
+
"build-small-hackathon",
|
| 46 |
+
"built",
|
| 47 |
+
"by",
|
| 48 |
"demo",
|
| 49 |
"face",
|
| 50 |
"for",
|
|
|
|
| 52 |
"gradio",
|
| 53 |
"hackathon",
|
| 54 |
"hugging",
|
| 55 |
+
"huggingface",
|
| 56 |
+
"in",
|
| 57 |
+
"is",
|
| 58 |
+
"it",
|
| 59 |
+
"its",
|
| 60 |
+
"first",
|
| 61 |
"local",
|
| 62 |
+
"make",
|
| 63 |
+
"makes",
|
| 64 |
+
"made",
|
| 65 |
+
"me",
|
| 66 |
"model",
|
| 67 |
+
"models",
|
| 68 |
+
"my",
|
| 69 |
+
"of",
|
| 70 |
+
"on",
|
| 71 |
+
"or",
|
| 72 |
+
"our",
|
| 73 |
+
"one",
|
| 74 |
"project",
|
| 75 |
+
"projects",
|
| 76 |
+
"pro",
|
| 77 |
+
"region",
|
| 78 |
+
"run",
|
| 79 |
+
"runs",
|
| 80 |
"small",
|
| 81 |
"space",
|
| 82 |
+
"spaces",
|
| 83 |
+
"submission",
|
| 84 |
+
"the",
|
| 85 |
+
"their",
|
| 86 |
+
"them",
|
| 87 |
+
"these",
|
| 88 |
+
"they",
|
| 89 |
"this",
|
| 90 |
+
"those",
|
| 91 |
+
"to",
|
| 92 |
+
"tool",
|
| 93 |
+
"tools",
|
| 94 |
+
"try",
|
| 95 |
+
"us",
|
| 96 |
+
"use",
|
| 97 |
+
"used",
|
| 98 |
+
"uses",
|
| 99 |
+
"using",
|
| 100 |
+
"we",
|
| 101 |
"with",
|
| 102 |
+
"you",
|
| 103 |
+
"your",
|
| 104 |
}
|
| 105 |
|
| 106 |
|
|
|
|
| 146 |
"random_state": TSNE_RANDOM_STATE,
|
| 147 |
"perplexity": _tsne_perplexity(len(projects)),
|
| 148 |
},
|
| 149 |
+
"cluster_label_algorithm": CLUSTER_LABEL_ALGORITHM,
|
| 150 |
"points": points,
|
| 151 |
"links": links,
|
| 152 |
"clusters": clusters,
|
|
|
|
| 273 |
)
|
| 274 |
cluster_id_by_raw = {label: f"cluster-{position + 1}" for position, label in enumerate(ordered_raw_labels)}
|
| 275 |
clusters: list[dict[str, Any]] = []
|
| 276 |
+
corpus_document_frequency = _corpus_document_frequency(projects)
|
| 277 |
for raw_label in ordered_raw_labels:
|
| 278 |
indexes = grouped[raw_label]
|
| 279 |
+
cluster_projects = [projects[index] for index in indexes]
|
|
|
|
| 280 |
representatives = sorted(
|
| 281 |
+
cluster_projects,
|
| 282 |
key=lambda project: (project.likes, project.last_modified, project.title.lower()),
|
| 283 |
reverse=True,
|
| 284 |
)[:4]
|
| 285 |
+
keywords = _cluster_keywords(
|
| 286 |
+
cluster_projects,
|
| 287 |
+
corpus_document_frequency=corpus_document_frequency,
|
| 288 |
+
corpus_project_count=len(projects),
|
| 289 |
+
)
|
| 290 |
+
label = (
|
| 291 |
+
" / ".join(word.title() for word in keywords[:2])
|
| 292 |
+
if keywords
|
| 293 |
+
else _representative_cluster_label(representatives)
|
| 294 |
+
)
|
| 295 |
clusters.append(
|
| 296 |
{
|
| 297 |
"id": cluster_id_by_raw[raw_label],
|
|
|
|
| 315 |
)
|
| 316 |
|
| 317 |
|
| 318 |
+
def _corpus_document_frequency(projects: Sequence[Project]) -> Counter[str]:
|
| 319 |
+
document_frequency: Counter[str] = Counter()
|
| 320 |
for project in projects:
|
| 321 |
+
document_frequency.update(set(_project_keyword_tokens(project)))
|
| 322 |
+
return document_frequency
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def _cluster_keywords(
|
| 326 |
+
projects: Sequence[Project],
|
| 327 |
+
*,
|
| 328 |
+
corpus_document_frequency: Mapping[str, int],
|
| 329 |
+
corpus_project_count: int,
|
| 330 |
+
) -> list[str]:
|
| 331 |
+
counts: Counter[str] = Counter()
|
| 332 |
+
document_frequency: Counter[str] = Counter()
|
| 333 |
+
project_list = list(projects)
|
| 334 |
+
for project in project_list:
|
| 335 |
+
tokens = _project_keyword_tokens(project)
|
| 336 |
+
counts.update(tokens)
|
| 337 |
+
document_frequency.update(set(tokens))
|
| 338 |
+
|
| 339 |
+
if not project_list:
|
| 340 |
+
return []
|
| 341 |
+
|
| 342 |
+
min_cluster_documents = 1 if len(project_list) <= 3 else 2
|
| 343 |
+
scored: list[tuple[float, int, int, str]] = []
|
| 344 |
+
for token, count in counts.items():
|
| 345 |
+
cluster_documents = document_frequency[token]
|
| 346 |
+
if cluster_documents < min_cluster_documents:
|
| 347 |
+
continue
|
| 348 |
+
corpus_documents = int(corpus_document_frequency.get(token) or 0)
|
| 349 |
+
if corpus_documents <= 0:
|
| 350 |
+
continue
|
| 351 |
+
inverse_document_frequency = math.log((1 + corpus_project_count) / (1 + corpus_documents))
|
| 352 |
+
if inverse_document_frequency <= 0.0:
|
| 353 |
+
continue
|
| 354 |
+
exclusivity = cluster_documents / corpus_documents
|
| 355 |
+
coverage = cluster_documents / len(project_list)
|
| 356 |
+
score = (
|
| 357 |
+
(1.0 + math.log(count))
|
| 358 |
+
* inverse_document_frequency
|
| 359 |
+
* (0.35 + 0.65 * exclusivity)
|
| 360 |
+
* (0.35 + 0.65 * coverage)
|
| 361 |
)
|
| 362 |
+
scored.append((score, cluster_documents, count, token))
|
| 363 |
+
|
| 364 |
+
scored.sort(key=lambda item: (-item[0], -item[1], -item[2], item[3]))
|
| 365 |
+
return [token for _score, _cluster_documents, _count, token in scored[:5]]
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def _project_keyword_tokens(project: Project) -> list[str]:
|
| 369 |
+
text = " ".join(
|
| 370 |
+
[
|
| 371 |
+
project.title,
|
| 372 |
+
project.slug.replace("-", " ").replace("_", " "),
|
| 373 |
+
project.summary,
|
| 374 |
+
" ".join(normalize_project_tags(project.tags)),
|
| 375 |
+
" ".join(project.models),
|
| 376 |
+
]
|
| 377 |
+
)
|
| 378 |
+
return [token for token in tokenize(text) if _is_cluster_keyword(token)]
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def _is_cluster_keyword(token: str) -> bool:
|
| 382 |
+
if token in STOPWORDS:
|
| 383 |
+
return False
|
| 384 |
+
if token.startswith("region"):
|
| 385 |
+
return False
|
| 386 |
+
if token.isdigit():
|
| 387 |
+
return False
|
| 388 |
+
return True
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
def _representative_cluster_label(projects: Sequence[Project]) -> str:
|
| 392 |
+
labels: list[str] = []
|
| 393 |
+
for project in projects:
|
| 394 |
+
title = public_project_title(project.title)
|
| 395 |
+
if title == "Untitled project":
|
| 396 |
+
continue
|
| 397 |
+
labels.append(title)
|
| 398 |
+
if len(labels) == 2:
|
| 399 |
+
break
|
| 400 |
+
return " / ".join(labels) if labels else "Mixed projects"
|
| 401 |
|
| 402 |
|
| 403 |
def _normalize_quest_matches(
|
|
|
|
| 449 |
"likes": project.likes,
|
| 450 |
"sdk": project.sdk,
|
| 451 |
"models": list(project.models),
|
| 452 |
+
"tags": list(normalize_project_tags(project.tags)),
|
| 453 |
"last_modified": project.last_modified,
|
| 454 |
"x": x,
|
| 455 |
"y": y,
|
hackathon_advisor/data.py
CHANGED
|
@@ -32,6 +32,7 @@ DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
|
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
@@ -108,7 +109,7 @@ class Project:
|
|
| 108 |
"id": self.id,
|
| 109 |
"title": public_project_title(self.title),
|
| 110 |
"summary": public_project_summary(self.summary),
|
| 111 |
-
"tags": list(self.tags),
|
| 112 |
"models": list(self.models),
|
| 113 |
"datasets": list(self.datasets),
|
| 114 |
"likes": self.likes,
|
|
@@ -150,6 +151,7 @@ class Project:
|
|
| 150 |
)
|
| 151 |
return payload
|
| 152 |
|
|
|
|
| 153 |
@dataclass(frozen=True)
|
| 154 |
class SearchHit:
|
| 155 |
project: Project
|
|
@@ -185,6 +187,25 @@ def public_project_title(title: str) -> str:
|
|
| 185 |
return cleaned
|
| 186 |
|
| 187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
def public_project_summary(summary: str) -> str:
|
| 189 |
cleaned = " ".join(str(summary).split())
|
| 190 |
if not cleaned:
|
|
|
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
|
| 35 |
+
HOSTING_METADATA_TAG_PREFIXES = ("region:",)
|
| 36 |
|
| 37 |
|
| 38 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
|
|
| 109 |
"id": self.id,
|
| 110 |
"title": public_project_title(self.title),
|
| 111 |
"summary": public_project_summary(self.summary),
|
| 112 |
+
"tags": list(normalize_project_tags(self.tags)),
|
| 113 |
"models": list(self.models),
|
| 114 |
"datasets": list(self.datasets),
|
| 115 |
"likes": self.likes,
|
|
|
|
| 151 |
)
|
| 152 |
return payload
|
| 153 |
|
| 154 |
+
|
| 155 |
@dataclass(frozen=True)
|
| 156 |
class SearchHit:
|
| 157 |
project: Project
|
|
|
|
| 187 |
return cleaned
|
| 188 |
|
| 189 |
|
| 190 |
+
def normalize_project_tags(tags: Sequence[Any]) -> tuple[str, ...]:
|
| 191 |
+
cleaned: list[str] = []
|
| 192 |
+
seen: set[str] = set()
|
| 193 |
+
for raw_tag in tags or ():
|
| 194 |
+
tag = " ".join(str(raw_tag or "").split())
|
| 195 |
+
if not tag or is_hosting_metadata_tag(tag):
|
| 196 |
+
continue
|
| 197 |
+
if tag in seen:
|
| 198 |
+
continue
|
| 199 |
+
seen.add(tag)
|
| 200 |
+
cleaned.append(tag)
|
| 201 |
+
return tuple(cleaned)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def is_hosting_metadata_tag(tag: str) -> bool:
|
| 205 |
+
folded = str(tag or "").strip().casefold()
|
| 206 |
+
return any(folded.startswith(prefix) for prefix in HOSTING_METADATA_TAG_PREFIXES)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
def public_project_summary(summary: str) -> str:
|
| 210 |
cleaned = " ".join(str(summary).split())
|
| 211 |
if not cleaned:
|
hackathon_advisor/quest_analysis.py
CHANGED
|
@@ -7,7 +7,7 @@ import json
|
|
| 7 |
import os
|
| 8 |
from typing import Any, Protocol
|
| 9 |
|
| 10 |
-
from hackathon_advisor.data import Project
|
| 11 |
from hackathon_advisor.model_runtime import (
|
| 12 |
DEFAULT_MODEL_ID,
|
| 13 |
_minicpm_generation_kwargs,
|
|
@@ -317,7 +317,7 @@ def render_project_quest_prompt(project: Project) -> str:
|
|
| 317 |
title=project.title,
|
| 318 |
sdk=project.sdk,
|
| 319 |
declared_models=project.models,
|
| 320 |
-
tags=project.tags,
|
| 321 |
readme_segment=build_readme_segment(project.readme_body),
|
| 322 |
app_file_name=project.app_file,
|
| 323 |
app_file_segment=build_app_segment(project.app_file_source, project.app_file_embedding_text),
|
|
|
|
| 7 |
import os
|
| 8 |
from typing import Any, Protocol
|
| 9 |
|
| 10 |
+
from hackathon_advisor.data import Project, normalize_project_tags
|
| 11 |
from hackathon_advisor.model_runtime import (
|
| 12 |
DEFAULT_MODEL_ID,
|
| 13 |
_minicpm_generation_kwargs,
|
|
|
|
| 317 |
title=project.title,
|
| 318 |
sdk=project.sdk,
|
| 319 |
declared_models=project.models,
|
| 320 |
+
tags=normalize_project_tags(project.tags),
|
| 321 |
readme_segment=build_readme_segment(project.readme_body),
|
| 322 |
app_file_name=project.app_file,
|
| 323 |
app_file_segment=build_app_segment(project.app_file_source, project.app_file_embedding_text),
|
scripts/crawl_hf_spaces.py
CHANGED
|
@@ -19,7 +19,7 @@ from huggingface_hub.errors import EntryNotFoundError
|
|
| 19 |
ROOT = Path(__file__).resolve().parents[1]
|
| 20 |
sys.path.insert(0, str(ROOT))
|
| 21 |
|
| 22 |
-
from hackathon_advisor.data import extract_app_file_embedding_text
|
| 23 |
|
| 24 |
|
| 25 |
API = "https://huggingface.co/api"
|
|
@@ -88,11 +88,12 @@ def project_from_space(space: Any) -> dict[str, Any]:
|
|
| 88 |
|
| 89 |
title = str(card.get("title") or humanize_slug(space_id.rsplit("/", 1)[-1]))
|
| 90 |
summary = str(card.get("short_description") or card.get("description") or "")
|
|
|
|
| 91 |
return {
|
| 92 |
"id": space_id,
|
| 93 |
"title": title,
|
| 94 |
"summary": summary,
|
| 95 |
-
"tags":
|
| 96 |
"models": [str(model) for model in getattr(space, "models", None) or card.get("models") or []],
|
| 97 |
"datasets": [
|
| 98 |
str(dataset) for dataset in getattr(space, "datasets", None) or card.get("datasets") or []
|
|
|
|
| 19 |
ROOT = Path(__file__).resolve().parents[1]
|
| 20 |
sys.path.insert(0, str(ROOT))
|
| 21 |
|
| 22 |
+
from hackathon_advisor.data import extract_app_file_embedding_text, normalize_project_tags
|
| 23 |
|
| 24 |
|
| 25 |
API = "https://huggingface.co/api"
|
|
|
|
| 88 |
|
| 89 |
title = str(card.get("title") or humanize_slug(space_id.rsplit("/", 1)[-1]))
|
| 90 |
summary = str(card.get("short_description") or card.get("description") or "")
|
| 91 |
+
raw_tags = sorted(set(str(tag) for tag in (card.get("tags") or getattr(space, "tags", None) or [])))
|
| 92 |
return {
|
| 93 |
"id": space_id,
|
| 94 |
"title": title,
|
| 95 |
"summary": summary,
|
| 96 |
+
"tags": list(normalize_project_tags(raw_tags)),
|
| 97 |
"models": [str(model) for model in getattr(space, "models", None) or card.get("models") or []],
|
| 98 |
"datasets": [
|
| 99 |
str(dataset) for dataset in getattr(space, "datasets", None) or card.get("datasets") or []
|
static/app.js
CHANGED
|
@@ -498,7 +498,7 @@ function renderAtlasDetail(point) {
|
|
| 498 |
return `<span>${escapeHtml(atlasQuestLabel(match.quest))} ${confidence}%</span>`;
|
| 499 |
})
|
| 500 |
.join("");
|
| 501 |
-
const tags = [...(point.models || []).slice(0, 3), ...(point.tags || []).slice(0, 3)]
|
| 502 |
.map((tag) => `<span>${escapeHtml(tag)}</span>`)
|
| 503 |
.join("");
|
| 504 |
atlasDetailEl.innerHTML = `
|
|
@@ -511,6 +511,10 @@ function renderAtlasDetail(point) {
|
|
| 511 |
`;
|
| 512 |
}
|
| 513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
function renderAtlasReport(data) {
|
| 515 |
if (!atlasReportEl) return;
|
| 516 |
const cluster = selectedClusterId
|
|
|
|
| 498 |
return `<span>${escapeHtml(atlasQuestLabel(match.quest))} ${confidence}%</span>`;
|
| 499 |
})
|
| 500 |
.join("");
|
| 501 |
+
const tags = [...(point.models || []).slice(0, 3), ...visibleProjectTags(point.tags || []).slice(0, 3)]
|
| 502 |
.map((tag) => `<span>${escapeHtml(tag)}</span>`)
|
| 503 |
.join("");
|
| 504 |
atlasDetailEl.innerHTML = `
|
|
|
|
| 511 |
`;
|
| 512 |
}
|
| 513 |
|
| 514 |
+
function visibleProjectTags(tags) {
|
| 515 |
+
return (tags || []).filter((tag) => !String(tag || "").toLowerCase().startsWith("region:"));
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
function renderAtlasReport(data) {
|
| 519 |
if (!atlasReportEl) return;
|
| 520 |
const cluster = selectedClusterId
|
tests/test_app.py
CHANGED
|
@@ -133,6 +133,24 @@ def test_dashboard_endpoint_exposes_atlas_payload() -> None:
|
|
| 133 |
assert payload["links"]
|
| 134 |
assert payload["quest_report"]["status"] in {"analyzed", "not_analyzed"}
|
| 135 |
assert payload["refresh"]["status"] in {"idle", "running", "succeeded", "failed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
def test_dashboard_refresh_requires_bucket(monkeypatch) -> None:
|
|
|
|
| 133 |
assert payload["links"]
|
| 134 |
assert payload["quest_report"]["status"] in {"analyzed", "not_analyzed"}
|
| 135 |
assert payload["refresh"]["status"] in {"idle", "running", "succeeded", "failed"}
|
| 136 |
+
assert all(
|
| 137 |
+
not str(tag).casefold().startswith("region:")
|
| 138 |
+
for point in payload["points"]
|
| 139 |
+
for tag in point.get("tags", [])
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def test_refresh_error_format_includes_exception_chain() -> None:
|
| 144 |
+
try:
|
| 145 |
+
try:
|
| 146 |
+
raise ValueError("bad quest")
|
| 147 |
+
except ValueError as cause:
|
| 148 |
+
raise RuntimeError("refresh failed") from cause
|
| 149 |
+
except RuntimeError as error:
|
| 150 |
+
message = app_module._format_refresh_error(error)
|
| 151 |
+
|
| 152 |
+
assert "RuntimeError: refresh failed" in message
|
| 153 |
+
assert "caused by ValueError: bad quest" in message
|
| 154 |
|
| 155 |
|
| 156 |
def test_dashboard_refresh_requires_bucket(monkeypatch) -> None:
|
tests/test_crawl_hf_spaces.py
CHANGED
|
@@ -44,7 +44,7 @@ def test_project_from_space_downloads_frontmatter_app_file(monkeypatch) -> None:
|
|
| 44 |
SimpleNamespace(rfilename="README.md"),
|
| 45 |
SimpleNamespace(rfilename="app.py"),
|
| 46 |
],
|
| 47 |
-
tags=["gradio"],
|
| 48 |
models=[],
|
| 49 |
datasets=[],
|
| 50 |
likes=3,
|
|
@@ -61,6 +61,7 @@ def test_project_from_space_downloads_frontmatter_app_file(monkeypatch) -> None:
|
|
| 61 |
assert project["app_file_source"] == "import gradio as gr\ngr.Textbox(label='Idea')\n"
|
| 62 |
assert "gr.Textbox" in project["app_file_embedding_text"]
|
| 63 |
assert "Idea" in project["app_file_embedding_text"]
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
def test_project_from_space_tolerates_stale_frontmatter_app_file(monkeypatch) -> None:
|
|
|
|
| 44 |
SimpleNamespace(rfilename="README.md"),
|
| 45 |
SimpleNamespace(rfilename="app.py"),
|
| 46 |
],
|
| 47 |
+
tags=["gradio", "region:us"],
|
| 48 |
models=[],
|
| 49 |
datasets=[],
|
| 50 |
likes=3,
|
|
|
|
| 61 |
assert project["app_file_source"] == "import gradio as gr\ngr.Textbox(label='Idea')\n"
|
| 62 |
assert "gr.Textbox" in project["app_file_embedding_text"]
|
| 63 |
assert "Idea" in project["app_file_embedding_text"]
|
| 64 |
+
assert project["tags"] == ["gradio"]
|
| 65 |
|
| 66 |
|
| 67 |
def test_project_from_space_tolerates_stale_frontmatter_app_file(monkeypatch) -> None:
|
tests/test_dashboard.py
CHANGED
|
@@ -2,7 +2,11 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
-
from hackathon_advisor.dashboard import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from hackathon_advisor.data import Project, ProjectIndex, build_index_payload
|
| 7 |
from hackathon_advisor.quest_analysis import (
|
| 8 |
MiniCPMQuestAnalyzer,
|
|
@@ -45,6 +49,7 @@ def test_dashboard_builder_projects_embeddings_with_tsne_and_clusters() -> None:
|
|
| 45 |
assert payload["quest_report"]["status"] == "analyzed"
|
| 46 |
assert all(0 <= point["x"] <= 100 and 0 <= point["y"] <= 100 for point in payload["points"])
|
| 47 |
assert all(point["quest_ids"] for point in payload["points"])
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def test_dashboard_builder_is_deterministic_for_fixed_vectors() -> None:
|
|
@@ -59,6 +64,18 @@ def test_dashboard_builder_is_deterministic_for_fixed_vectors() -> None:
|
|
| 59 |
assert left["clusters"] == right["clusters"]
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def test_quest_analysis_validation_accepts_strict_project_coverage() -> None:
|
| 63 |
projects = fake_projects(4)
|
| 64 |
raw = {
|
|
@@ -343,7 +360,7 @@ def test_quest_prompt_uses_raw_readme_and_app_source_segments() -> None:
|
|
| 343 |
id="build-small-hackathon/two-segment",
|
| 344 |
title="Two Segment",
|
| 345 |
summary="card summary should not drive quest analysis",
|
| 346 |
-
tags=("gradio",),
|
| 347 |
models=("openbmb/MiniCPM5-1B",),
|
| 348 |
datasets=(),
|
| 349 |
likes=1,
|
|
@@ -367,6 +384,7 @@ def test_quest_prompt_uses_raw_readme_and_app_source_segments() -> None:
|
|
| 367 |
assert "from llama_cpp import Llama" in prompt
|
| 368 |
assert "card summary should not drive quest analysis" not in prompt
|
| 369 |
assert "compact app signals should not drive quest analysis" not in prompt
|
|
|
|
| 370 |
|
| 371 |
|
| 372 |
def test_quest_analyzer_rejects_non_minicpm_backend(monkeypatch) -> None:
|
|
@@ -400,6 +418,56 @@ def fake_index() -> ProjectIndex:
|
|
| 400 |
)
|
| 401 |
|
| 402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
def fake_projects(count: int) -> list[Project]:
|
| 404 |
return [
|
| 405 |
Project(
|
|
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
+
from hackathon_advisor.dashboard import (
|
| 6 |
+
CLUSTER_LABEL_ALGORITHM,
|
| 7 |
+
build_dashboard_payload,
|
| 8 |
+
validate_dashboard_payload,
|
| 9 |
+
)
|
| 10 |
from hackathon_advisor.data import Project, ProjectIndex, build_index_payload
|
| 11 |
from hackathon_advisor.quest_analysis import (
|
| 12 |
MiniCPMQuestAnalyzer,
|
|
|
|
| 49 |
assert payload["quest_report"]["status"] == "analyzed"
|
| 50 |
assert all(0 <= point["x"] <= 100 and 0 <= point["y"] <= 100 for point in payload["points"])
|
| 51 |
assert all(point["quest_ids"] for point in payload["points"])
|
| 52 |
+
assert payload["cluster_label_algorithm"] == CLUSTER_LABEL_ALGORITHM
|
| 53 |
|
| 54 |
|
| 55 |
def test_dashboard_builder_is_deterministic_for_fixed_vectors() -> None:
|
|
|
|
| 64 |
assert left["clusters"] == right["clusters"]
|
| 65 |
|
| 66 |
|
| 67 |
+
def test_dashboard_cluster_labels_ignore_hackathon_wide_noise() -> None:
|
| 68 |
+
index = noisy_cluster_label_index()
|
| 69 |
+
|
| 70 |
+
payload = build_dashboard_payload(index, generated_at="2026-06-08T00:00:00+00:00")
|
| 71 |
+
|
| 72 |
+
banned = {"ai", "build-small-hackathon", "gradio", "hackathon", "project", "region", "us"}
|
| 73 |
+
keywords = {keyword for cluster in payload["clusters"] for keyword in cluster["keywords"]}
|
| 74 |
+
assert keywords.isdisjoint(banned)
|
| 75 |
+
assert {"dream", "family", "garden", "notice", "order", "repair"} & keywords
|
| 76 |
+
assert all("region:us" not in point["tags"] for point in payload["points"])
|
| 77 |
+
|
| 78 |
+
|
| 79 |
def test_quest_analysis_validation_accepts_strict_project_coverage() -> None:
|
| 80 |
projects = fake_projects(4)
|
| 81 |
raw = {
|
|
|
|
| 360 |
id="build-small-hackathon/two-segment",
|
| 361 |
title="Two Segment",
|
| 362 |
summary="card summary should not drive quest analysis",
|
| 363 |
+
tags=("gradio", "region:us"),
|
| 364 |
models=("openbmb/MiniCPM5-1B",),
|
| 365 |
datasets=(),
|
| 366 |
likes=1,
|
|
|
|
| 384 |
assert "from llama_cpp import Llama" in prompt
|
| 385 |
assert "card summary should not drive quest analysis" not in prompt
|
| 386 |
assert "compact app signals should not drive quest analysis" not in prompt
|
| 387 |
+
assert "region:us" not in prompt
|
| 388 |
|
| 389 |
|
| 390 |
def test_quest_analyzer_rejects_non_minicpm_backend(monkeypatch) -> None:
|
|
|
|
| 418 |
)
|
| 419 |
|
| 420 |
|
| 421 |
+
def noisy_cluster_label_index() -> ProjectIndex:
|
| 422 |
+
themes = [
|
| 423 |
+
("dream", ("Dream Lantern", "Dream Atlas"), "dream journal symbolic oracle"),
|
| 424 |
+
("family", ("Family Ledger", "Care Kinship"), "family care bill coordination"),
|
| 425 |
+
("garden", ("Garden Notebook", "Seed Exchange"), "garden seed neighborhood plants"),
|
| 426 |
+
("notice", ("Notice Helper", "Scam Screen"), "notice scam safety verification"),
|
| 427 |
+
("order", ("Order Desk", "Inventory Voice"), "order inventory audio assistant"),
|
| 428 |
+
("repair", ("Repair Coach", "Tool Shed"), "repair maintenance workshop"),
|
| 429 |
+
]
|
| 430 |
+
projects: list[Project] = []
|
| 431 |
+
embeddings = []
|
| 432 |
+
for theme_index, (theme, titles, summary) in enumerate(themes):
|
| 433 |
+
for title in titles:
|
| 434 |
+
projects.append(
|
| 435 |
+
Project(
|
| 436 |
+
id=f"build-small-hackathon/{title.lower().replace(' ', '-')}",
|
| 437 |
+
title=title,
|
| 438 |
+
summary=(
|
| 439 |
+
f"{summary} for a build-small-hackathon AI project in the US region "
|
| 440 |
+
"with a Gradio demo."
|
| 441 |
+
),
|
| 442 |
+
tags=("build-small-hackathon", "ai", "gradio", "region:us", theme),
|
| 443 |
+
models=("tiny-model",),
|
| 444 |
+
datasets=(),
|
| 445 |
+
likes=theme_index,
|
| 446 |
+
sdk="gradio",
|
| 447 |
+
license="mit",
|
| 448 |
+
created_at="2026-06-01T00:00:00+00:00",
|
| 449 |
+
last_modified=f"2026-06-{theme_index + 1:02d}T00:00:00+00:00",
|
| 450 |
+
host=f"https://{title.lower().replace(' ', '-')}.hf.space",
|
| 451 |
+
url=f"https://huggingface.co/spaces/build-small-hackathon/{title.lower().replace(' ', '-')}",
|
| 452 |
+
app_file="app.py",
|
| 453 |
+
app_file_embedding_text="shared local small model app",
|
| 454 |
+
)
|
| 455 |
+
)
|
| 456 |
+
vector = [0.0] * len(themes)
|
| 457 |
+
vector[theme_index] = 1.0
|
| 458 |
+
embeddings.append(vector)
|
| 459 |
+
|
| 460 |
+
snapshot_generated_at = "2026-06-08T00:00:00+00:00"
|
| 461 |
+
source = "https://example.test/spaces"
|
| 462 |
+
payload = build_index_payload(projects, snapshot_generated_at, source, embeddings)
|
| 463 |
+
return ProjectIndex(
|
| 464 |
+
projects=projects,
|
| 465 |
+
generated_at=snapshot_generated_at,
|
| 466 |
+
source=source,
|
| 467 |
+
index_payload=payload,
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
|
| 471 |
def fake_projects(count: int) -> list[Project]:
|
| 472 |
return [
|
| 473 |
Project(
|
tests/test_data.py
CHANGED
|
@@ -85,6 +85,23 @@ def test_searchable_text_includes_main_app_file_signals() -> None:
|
|
| 85 |
assert "Project idea" in searchable
|
| 86 |
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
|
| 89 |
project = Project(
|
| 90 |
id="build-small-hackathon/long-readme",
|
|
|
|
| 85 |
assert "Project idea" in searchable
|
| 86 |
|
| 87 |
|
| 88 |
+
def test_public_project_tags_exclude_hosting_metadata() -> None:
|
| 89 |
+
project = Project.from_dict(
|
| 90 |
+
{
|
| 91 |
+
"id": "build-small-hackathon/idea-canvas",
|
| 92 |
+
"title": "Idea Canvas",
|
| 93 |
+
"summary": "",
|
| 94 |
+
"tags": ["gradio", "region:us", "local-first", "region:eu", "gradio"],
|
| 95 |
+
"models": [],
|
| 96 |
+
"datasets": [],
|
| 97 |
+
"url": "https://example.test",
|
| 98 |
+
}
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
assert project.tags == ("gradio", "region:us", "local-first", "region:eu", "gradio")
|
| 102 |
+
assert project.to_public_dict()["tags"] == ["gradio", "local-first"]
|
| 103 |
+
|
| 104 |
+
|
| 105 |
def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
|
| 106 |
project = Project(
|
| 107 |
id="build-small-hackathon/long-readme",
|