Spaces:
Running on Zero
Running on Zero
fix: refresh from latest project snapshot
Browse filesSync GitHub commit ad338a5; ship the validated 148-project snapshot/index so deployed refresh reuses existing embeddings.
- data/project_index.json +0 -0
- data/projects.json +0 -0
- hackathon_advisor/data.py +0 -4
- tests/test_data.py +2 -2
data/project_index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/projects.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hackathon_advisor/data.py
CHANGED
|
@@ -32,7 +32,6 @@ DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
|
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
|
| 35 |
-
README_EMBEDDING_CHAR_LIMIT = 1200
|
| 36 |
|
| 37 |
|
| 38 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
@@ -92,9 +91,6 @@ class Project:
|
|
| 92 |
f"title: {self.title}",
|
| 93 |
f"slug: {self.slug.replace('-', ' ').replace('_', ' ')}",
|
| 94 |
f"summary: {self.summary}",
|
| 95 |
-
f"readme:\n{bounded_embedding_text(self.readme_body, README_EMBEDDING_CHAR_LIMIT)}"
|
| 96 |
-
if self.readme_body
|
| 97 |
-
else "",
|
| 98 |
f"tags: {' '.join(self.tags)}",
|
| 99 |
f"models: {' '.join(self.models)}",
|
| 100 |
f"datasets: {' '.join(self.datasets)}",
|
|
|
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
|
|
| 91 |
f"title: {self.title}",
|
| 92 |
f"slug: {self.slug.replace('-', ' ').replace('_', ' ')}",
|
| 93 |
f"summary: {self.summary}",
|
|
|
|
|
|
|
|
|
|
| 94 |
f"tags: {' '.join(self.tags)}",
|
| 95 |
f"models: {' '.join(self.models)}",
|
| 96 |
f"datasets: {' '.join(self.datasets)}",
|
tests/test_data.py
CHANGED
|
@@ -85,7 +85,7 @@ def test_searchable_text_includes_main_app_file_signals() -> None:
|
|
| 85 |
assert "Project idea" in searchable
|
| 86 |
|
| 87 |
|
| 88 |
-
def
|
| 89 |
project = Project(
|
| 90 |
id="build-small-hackathon/long-readme",
|
| 91 |
title="Long README",
|
|
@@ -105,8 +105,8 @@ def test_searchable_text_bounds_readme_body_for_embedding() -> None:
|
|
| 105 |
|
| 106 |
searchable = project.searchable_text
|
| 107 |
|
|
|
|
| 108 |
assert "middle should not be embedded" not in searchable
|
| 109 |
-
assert len(searchable) < 1600
|
| 110 |
|
| 111 |
|
| 112 |
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
|
|
|
|
| 85 |
assert "Project idea" in searchable
|
| 86 |
|
| 87 |
|
| 88 |
+
def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
|
| 89 |
project = Project(
|
| 90 |
id="build-small-hackathon/long-readme",
|
| 91 |
title="Long README",
|
|
|
|
| 105 |
|
| 106 |
searchable = project.searchable_text
|
| 107 |
|
| 108 |
+
assert "readme:" not in searchable
|
| 109 |
assert "middle should not be embedded" not in searchable
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
|