JacobLinCool commited on
Commit
742999b
·
verified ·
1 Parent(s): c9f8f52

fix: refresh from latest project snapshot

Browse files

Sync GitHub commit ad338a5; ship the validated 148-project snapshot/index so deployed refresh reuses existing embeddings.

data/project_index.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/projects.json CHANGED
The diff for this file is too large to render. See raw diff
 
hackathon_advisor/data.py CHANGED
@@ -32,7 +32,6 @@ DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
32
  DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
33
  DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
34
  APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
35
- README_EMBEDDING_CHAR_LIMIT = 1200
36
 
37
 
38
  EmbeddingFunction = Callable[[str], Sequence[float]]
@@ -92,9 +91,6 @@ class Project:
92
  f"title: {self.title}",
93
  f"slug: {self.slug.replace('-', ' ').replace('_', ' ')}",
94
  f"summary: {self.summary}",
95
- f"readme:\n{bounded_embedding_text(self.readme_body, README_EMBEDDING_CHAR_LIMIT)}"
96
- if self.readme_body
97
- else "",
98
  f"tags: {' '.join(self.tags)}",
99
  f"models: {' '.join(self.models)}",
100
  f"datasets: {' '.join(self.datasets)}",
 
32
  DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
33
  DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
34
  APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
 
35
 
36
 
37
  EmbeddingFunction = Callable[[str], Sequence[float]]
 
91
  f"title: {self.title}",
92
  f"slug: {self.slug.replace('-', ' ').replace('_', ' ')}",
93
  f"summary: {self.summary}",
 
 
 
94
  f"tags: {' '.join(self.tags)}",
95
  f"models: {' '.join(self.models)}",
96
  f"datasets: {' '.join(self.datasets)}",
tests/test_data.py CHANGED
@@ -85,7 +85,7 @@ def test_searchable_text_includes_main_app_file_signals() -> None:
85
  assert "Project idea" in searchable
86
 
87
 
88
- def test_searchable_text_bounds_readme_body_for_embedding() -> None:
89
  project = Project(
90
  id="build-small-hackathon/long-readme",
91
  title="Long README",
@@ -105,8 +105,8 @@ def test_searchable_text_bounds_readme_body_for_embedding() -> None:
105
 
106
  searchable = project.searchable_text
107
 
 
108
  assert "middle should not be embedded" not in searchable
109
- assert len(searchable) < 1600
110
 
111
 
112
  def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
 
85
  assert "Project idea" in searchable
86
 
87
 
88
+ def test_searchable_text_excludes_refresh_readme_body_for_stable_reuse() -> None:
89
  project = Project(
90
  id="build-small-hackathon/long-readme",
91
  title="Long README",
 
105
 
106
  searchable = project.searchable_text
107
 
108
+ assert "readme:" not in searchable
109
  assert "middle should not be embedded" not in searchable
 
110
 
111
 
112
  def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None: