Spaces:
Sleeping
Sleeping
Commit
·
8ec62ff
1
Parent(s):
66d0fd5
Updated
Browse files- smebuilder_vector.py +4 -3
smebuilder_vector.py
CHANGED
|
@@ -11,6 +11,7 @@ COLLECTION_NAME = "landing_page_generation_examples"
|
|
| 11 |
EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "intfloat/e5-large-v2")
|
| 12 |
HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/app/huggingface_cache")
|
| 13 |
|
|
|
|
| 14 |
os.makedirs(HF_CACHE_DIR, exist_ok=True)
|
| 15 |
os.makedirs(DB_LOCATION, exist_ok=True)
|
| 16 |
|
|
@@ -46,12 +47,12 @@ if add_documents:
|
|
| 46 |
str(row.get("js_code", "")),
|
| 47 |
str(row.get("sector", ""))
|
| 48 |
]).strip()
|
| 49 |
-
documents.append(Document(page_content=content, id
|
| 50 |
|
| 51 |
if documents:
|
| 52 |
-
vector_store.add_documents(documents=documents
|
| 53 |
|
| 54 |
# ----------------- RETRIEVER -----------------
|
| 55 |
retriever = vector_store.as_retriever(search_kwargs={"k": 20})
|
| 56 |
|
| 57 |
-
print(f"Vector store ready with {
|
|
|
|
| 11 |
EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "intfloat/e5-large-v2")
|
| 12 |
HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/app/huggingface_cache")
|
| 13 |
|
| 14 |
+
|
| 15 |
os.makedirs(HF_CACHE_DIR, exist_ok=True)
|
| 16 |
os.makedirs(DB_LOCATION, exist_ok=True)
|
| 17 |
|
|
|
|
| 47 |
str(row.get("js_code", "")),
|
| 48 |
str(row.get("sector", ""))
|
| 49 |
]).strip()
|
| 50 |
+
documents.append(Document(page_content=content, metadata={"id": str(i)}))
|
| 51 |
|
| 52 |
if documents:
|
| 53 |
+
vector_store.add_documents(documents=documents)
|
| 54 |
|
| 55 |
# ----------------- RETRIEVER -----------------
|
| 56 |
retriever = vector_store.as_retriever(search_kwargs={"k": 20})
|
| 57 |
|
| 58 |
+
print(f"Vector store ready with {vector_store._collection.count()} documents.")
|