Spaces:
Running on Zero
Running on Zero
fix: speed up refresh embeddings
Browse filesSync GitHub commit 01f6182; reduce retrieval embedding text bounds so Space refresh can rebuild the live atlas promptly.
- data/project_index.json +0 -0
- hackathon_advisor/data.py +4 -3
- tests/test_data.py +1 -1
data/project_index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hackathon_advisor/data.py
CHANGED
|
@@ -31,8 +31,8 @@ INDEX_ALGORITHM = "llama-cpp-embedding-v1"
|
|
| 31 |
DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
-
APP_FILE_EMBEDDING_CHAR_LIMIT =
|
| 35 |
-
README_EMBEDDING_CHAR_LIMIT =
|
| 36 |
|
| 37 |
|
| 38 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
@@ -99,7 +99,8 @@ class Project:
|
|
| 99 |
f"models: {' '.join(self.models)}",
|
| 100 |
f"datasets: {' '.join(self.datasets)}",
|
| 101 |
f"main app file: {self.app_file}" if self.app_file else "",
|
| 102 |
-
|
|
|
|
| 103 |
if self.app_file_embedding_text
|
| 104 |
else "",
|
| 105 |
]
|
|
|
|
| 31 |
DEFAULT_EMBEDDING_MODEL_REPO = "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF"
|
| 32 |
DEFAULT_EMBEDDING_MODEL_FILE = "embeddinggemma-300m-qat-Q8_0.gguf"
|
| 33 |
DEFAULT_EMBEDDING_RUNTIME = "llama.cpp via llama-cpp-python"
|
| 34 |
+
APP_FILE_EMBEDDING_CHAR_LIMIT = 2000
|
| 35 |
+
README_EMBEDDING_CHAR_LIMIT = 1200
|
| 36 |
|
| 37 |
|
| 38 |
EmbeddingFunction = Callable[[str], Sequence[float]]
|
|
|
|
| 99 |
f"models: {' '.join(self.models)}",
|
| 100 |
f"datasets: {' '.join(self.datasets)}",
|
| 101 |
f"main app file: {self.app_file}" if self.app_file else "",
|
| 102 |
+
"main app file content:\n"
|
| 103 |
+
f"{bounded_embedding_text(self.app_file_embedding_text, APP_FILE_EMBEDDING_CHAR_LIMIT)}"
|
| 104 |
if self.app_file_embedding_text
|
| 105 |
else "",
|
| 106 |
]
|
tests/test_data.py
CHANGED
|
@@ -106,7 +106,7 @@ def test_searchable_text_bounds_readme_body_for_embedding() -> None:
|
|
| 106 |
searchable = project.searchable_text
|
| 107 |
|
| 108 |
assert "middle should not be embedded" not in searchable
|
| 109 |
-
assert len(searchable) <
|
| 110 |
|
| 111 |
|
| 112 |
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
|
|
|
|
| 106 |
searchable = project.searchable_text
|
| 107 |
|
| 108 |
assert "middle should not be embedded" not in searchable
|
| 109 |
+
assert len(searchable) < 1600
|
| 110 |
|
| 111 |
|
| 112 |
def test_project_index_rejects_mismatched_snapshot(tmp_path: Path) -> None:
|