Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -282,58 +282,17 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
|
|
| 282 |
return chunks
|
| 283 |
|
| 284 |
# Setup Vectorstore
|
| 285 |
-
#def setup_vectorstore(chunks, model_name):
|
| 286 |
-
# print("Start setup_vectorstore_function")
|
| 287 |
-
# embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
| 288 |
-
|
| 289 |
-
# vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
| 290 |
-
# vectorstore.persist()
|
| 291 |
-
# print("test1", vectorstore._persist_directory)
|
| 292 |
-
# print("test2",vectorstore.__dir__)
|
| 293 |
-
# return vectorstore
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
def setup_vectorstore(chunks, model_name):
|
| 298 |
print("Start setup_vectorstore_function")
|
|
|
|
| 299 |
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
# Set up the vectorstore with the temporary directory
|
| 308 |
-
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=temp_dir)
|
| 309 |
-
vectorstore.persist()
|
| 310 |
-
|
| 311 |
-
# Optionally, display the persist directory for debugging
|
| 312 |
-
print("Persist directory:", vectorstore._persist_directory)
|
| 313 |
-
print("Available methods in vectorstore:", dir(vectorstore))
|
| 314 |
-
|
| 315 |
-
# Zip the entire folder
|
| 316 |
-
zip_file_path = os.path.join(temp_dir, "folder_archive.zip")
|
| 317 |
-
with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
| 318 |
-
for folder_root, _, folder_files in os.walk(temp_dir):
|
| 319 |
-
for file_name in folder_files:
|
| 320 |
-
file_path = os.path.join(folder_root, file_name)
|
| 321 |
-
arcname = os.path.relpath(file_path, temp_dir) # Preserve relative paths
|
| 322 |
-
zipf.write(file_path, arcname)
|
| 323 |
-
|
| 324 |
-
print(f"Created zip file: {zip_file_path}")
|
| 325 |
-
|
| 326 |
-
# Upload the zip file
|
| 327 |
-
target_path_in_repo = "folder_archive.zip" # Define the target name in the repository
|
| 328 |
-
api.upload_file(
|
| 329 |
-
path_or_fileobj=zip_file_path,
|
| 330 |
-
path_in_repo=target_path_in_repo,
|
| 331 |
-
repo_id=HF_SPACE_NAME,
|
| 332 |
-
repo_type="space"
|
| 333 |
-
)
|
| 334 |
-
print(f"Uploaded {zip_file_path} as {target_path_in_repo}")
|
| 335 |
-
|
| 336 |
-
print("Process completed successfully!")
|
| 337 |
|
| 338 |
|
| 339 |
|
|
|
|
| 282 |
return chunks
|
| 283 |
|
| 284 |
# Setup Vectorstore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
def setup_vectorstore(chunks, model_name):
|
| 286 |
print("Start setup_vectorstore_function")
|
| 287 |
+
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
| 288 |
|
| 289 |
+
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
| 290 |
+
vectorstore.persist()
|
| 291 |
+
print("test1", vectorstore._persist_directory)
|
| 292 |
+
print("test2",vectorstore.__dir__)
|
| 293 |
+
return vectorstore
|
| 294 |
+
|
| 295 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
|
| 298 |
|