hackathon-advisor / tests /test_data.py
JacobLinCool's picture
fix: ignore hosting region tags
04ad98e verified
from pathlib import Path
from tests.helpers import load_test_index
import json
from hackathon_advisor.data import (
Project,
ProjectIndex,
public_project_summary,
public_project_title,
)
def test_project_index_searches_snapshot() -> None:
index = load_test_index()
hits = index.search("lullaby children audio", limit=3)
assert hits
assert hits[0].project.id.startswith("build-small-hackathon/")
assert hits[0].page_number >= 1
assert index.index_algorithm == "llama-cpp-embedding-v1"
def test_project_index_whitespace() -> None:
index = load_test_index()
items = index.find_whitespace(limit=3)
assert len(items) == 3
assert all(item.label for item in items)
def test_public_project_cards_hide_generic_submission_copy() -> None:
assert public_project_title("My Build Small Hackathon") == "Untitled project"
assert public_project_summary("This is my submission for the build-small-hackathon") == ""
assert public_project_summary("Todo") == ""
assert public_project_summary("Local-first personal knowledge agent") == "Local-first personal knowledge agent"
project = Project(
id="build-small-hackathon/my-build-small-hackathon",
title="My Build Small Hackathon",
summary="This is my submission for the build-small-hackathon",
tags=(),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
)
public = project.to_public_dict()
assert public["title"] == "Untitled project"
assert public["summary"] == ""
def test_searchable_text_includes_main_app_file_signals() -> None:
project = Project(
id="build-small-hackathon/idea-canvas",
title="Idea Canvas",
summary="",
tags=("gradio",),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
app_file="app.py",
app_file_embedding_text="score_idea\ngr.Textbox\nProject idea",
)
searchable = project.searchable_text
assert "main app file: app.py" in searchable
assert "score_idea" in searchable
assert "Project idea" in searchable
def test_public_project_tags_exclude_hosting_metadata() -> None:
project = Project.from_dict(
{
"id": "build-small-hackathon/idea-canvas",
"title": "Idea Canvas",
"summary": "",
"tags": ["gradio", "region:us", "local-first", "region:eu", "gradio"],
"models": [],
"datasets": [],
"url": "https://example.test",
}
)
assert project.tags == ("gradio", "region:us", "local-first", "region:eu", "gradio")
assert project.to_public_dict()["tags"] == ["gradio", "local-first"]
def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
project = Project(
id="build-small-hackathon/long-readme",
title="Long README",
summary="",
tags=(),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
readme_body="a" * 2500 + "middle should not be embedded" + "b" * 2500,
)
searchable = project.searchable_text
assert "readme:" not in searchable
assert "middle should not be embedded" not in searchable
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
payload = json.loads(Path("data/project_index.json").read_text(encoding="utf-8"))
payload["snapshot_generated_at"] = "2000-01-01T00:00:00+00:00"
bad_index = tmp_path / "project_index.json"
bad_index.write_text(json.dumps(payload), encoding="utf-8")
try:
ProjectIndex.from_files(Path("data/projects.json"), bad_index)
except ValueError as error:
assert "different snapshot timestamp" in str(error)
else:
raise AssertionError("mismatched index should be rejected")
def test_project_index_retains_validated_payload() -> None:
payload = json.loads(Path("data/project_index.json").read_text(encoding="utf-8"))
index = ProjectIndex.from_files(Path("data/projects.json"), Path("data/project_index.json"))
assert index.index_payload["snapshot_digest"] == payload["snapshot_digest"]
assert len(index.index_payload["documents"]) == len(index.projects)