Spaces:
Running on Zero
Running on Zero
File size: 4,603 Bytes
f44aac9 e12a049 9219266 f44aac9 d0718ca f44aac9 e12a049 f44aac9 902a11f e12a049 f44aac9 e12a049 f44aac9 9219266 490a71e d0718ca 04ad98e 742999b f984c60 742999b f984c60 9219266 c9f8f52 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | from pathlib import Path
from tests.helpers import load_test_index
import json
from hackathon_advisor.data import (
Project,
ProjectIndex,
public_project_summary,
public_project_title,
)
def test_project_index_searches_snapshot() -> None:
index = load_test_index()
hits = index.search("lullaby children audio", limit=3)
assert hits
assert hits[0].project.id.startswith("build-small-hackathon/")
assert hits[0].page_number >= 1
assert index.index_algorithm == "llama-cpp-embedding-v1"
def test_project_index_whitespace() -> None:
index = load_test_index()
items = index.find_whitespace(limit=3)
assert len(items) == 3
assert all(item.label for item in items)
def test_public_project_cards_hide_generic_submission_copy() -> None:
assert public_project_title("My Build Small Hackathon") == "Untitled project"
assert public_project_summary("This is my submission for the build-small-hackathon") == ""
assert public_project_summary("Todo") == ""
assert public_project_summary("Local-first personal knowledge agent") == "Local-first personal knowledge agent"
project = Project(
id="build-small-hackathon/my-build-small-hackathon",
title="My Build Small Hackathon",
summary="This is my submission for the build-small-hackathon",
tags=(),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
)
public = project.to_public_dict()
assert public["title"] == "Untitled project"
assert public["summary"] == ""
def test_searchable_text_includes_main_app_file_signals() -> None:
project = Project(
id="build-small-hackathon/idea-canvas",
title="Idea Canvas",
summary="",
tags=("gradio",),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
app_file="app.py",
app_file_embedding_text="score_idea\ngr.Textbox\nProject idea",
)
searchable = project.searchable_text
assert "main app file: app.py" in searchable
assert "score_idea" in searchable
assert "Project idea" in searchable
def test_public_project_tags_exclude_hosting_metadata() -> None:
project = Project.from_dict(
{
"id": "build-small-hackathon/idea-canvas",
"title": "Idea Canvas",
"summary": "",
"tags": ["gradio", "region:us", "local-first", "region:eu", "gradio"],
"models": [],
"datasets": [],
"url": "https://example.test",
}
)
assert project.tags == ("gradio", "region:us", "local-first", "region:eu", "gradio")
assert project.to_public_dict()["tags"] == ["gradio", "local-first"]
def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
project = Project(
id="build-small-hackathon/long-readme",
title="Long README",
summary="",
tags=(),
models=(),
datasets=(),
likes=0,
sdk="gradio",
license="",
created_at="",
last_modified="",
host="",
url="https://example.test",
readme_body="a" * 2500 + "middle should not be embedded" + "b" * 2500,
)
searchable = project.searchable_text
assert "readme:" not in searchable
assert "middle should not be embedded" not in searchable
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
payload = json.loads(Path("data/project_index.json").read_text(encoding="utf-8"))
payload["snapshot_generated_at"] = "2000-01-01T00:00:00+00:00"
bad_index = tmp_path / "project_index.json"
bad_index.write_text(json.dumps(payload), encoding="utf-8")
try:
ProjectIndex.from_files(Path("data/projects.json"), bad_index)
except ValueError as error:
assert "different snapshot timestamp" in str(error)
else:
raise AssertionError("mismatched index should be rejected")
def test_project_index_retains_validated_payload() -> None:
payload = json.loads(Path("data/project_index.json").read_text(encoding="utf-8"))
index = ProjectIndex.from_files(Path("data/projects.json"), Path("data/project_index.json"))
assert index.index_payload["snapshot_digest"] == payload["snapshot_digest"]
assert len(index.index_payload["documents"]) == len(index.projects)
|