Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

JacobLinCool commited on Jun 8

Commit

99bcb68

verified ·

1 Parent(s): f984c60

fix: speed up refresh embeddings

Sync GitHub commit 01f6182; reduce retrieval embedding text bounds so Space refresh can rebuild the live atlas promptly.

Files changed (3) hide show

data/project_index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

hackathon_advisor/data.py CHANGED Viewed

@@ -31,8 +31,8 @@ INDEX_ALGORITHM = "llama-cpp-embedding-v1"
 DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
 DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
 DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
-APP_FILE_EMBEDDING_CHAR_LIMIT = 8000
-README_EMBEDDING_CHAR_LIMIT = 4000
 EmbeddingFunction = Callable[[str], Sequence[float]]
@@ -99,7 +99,8 @@ class Project:
                 f"models: {' '.join(self.models)}",
                 f"datasets: {' '.join(self.datasets)}",
                 f"main app file: {self.app_file}" if self.app_file else "",
-                f"main app file content:\n{self.app_file_embedding_text}"
                 if self.app_file_embedding_text
                 else "",
             ]

 DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
 DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
 DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
+APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
+README_EMBEDDING_CHAR_LIMIT = 1200
 EmbeddingFunction = Callable[[str], Sequence[float]]
                 f"models: {' '.join(self.models)}",
                 f"datasets: {' '.join(self.datasets)}",
                 f"main app file: {self.app_file}" if self.app_file else "",
+                "main app file content:\n"
+                f"{bounded_embedding_text(self.app_file_embedding_text, APP_FILE_EMBEDDING_CHAR_LIMIT)}"
                 if self.app_file_embedding_text
                 else "",
             ]

tests/test_data.py CHANGED Viewed

@@ -106,7 +106,7 @@ def test_searchable_text_bounds_readme_body_for_embedding() -> None:
     searchable = project.searchable_text
     assert "middle should not be embedded" not in searchable
-    assert len(searchable) < 4300
 def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:

     searchable = project.searchable_text
     assert "middle should not be embedded" not in searchable
+    assert len(searchable) < 1600
 def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None: