Spaces:
Runtime error
Runtime error
Update prepare_vector_dp.py
Browse files- prepare_vector_dp.py +19 -2
prepare_vector_dp.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
| 2 |
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
|
| 3 |
from langchain_community.vectorstores import FAISS
|
| 4 |
-
from langchain_community.embeddings import GPT4AllEmbeddings
|
|
|
|
| 5 |
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
|
|
|
|
| 8 |
# from llama_cpp import Llama
|
| 9 |
import os
|
| 10 |
|
|
@@ -42,7 +44,22 @@ def create_db_from_text():
|
|
| 42 |
chunks = text_splitter.split_text(raw_text)
|
| 43 |
|
| 44 |
# Embeding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
embedding_model = GPT4AllEmbeddings(model_file= model_file)
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Dua vao Faiss Vector DB
|
| 48 |
db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
|
|
@@ -59,7 +76,7 @@ def create_dp_from_files():
|
|
| 59 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
| 60 |
chunks = text_splitter.split_documents(documents)
|
| 61 |
|
| 62 |
-
embedding_model =
|
| 63 |
dp = FAISS.from_documents(chunks, embedding_model)
|
| 64 |
dp.save_local(vector_dp_path)
|
| 65 |
return dp
|
|
|
|
| 1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
| 2 |
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
|
| 3 |
from langchain_community.vectorstores import FAISS
|
| 4 |
+
#from langchain_community.embeddings import GPT4AllEmbeddings
|
| 5 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
|
| 7 |
from huggingface_hub import hf_hub_download
|
| 8 |
|
| 9 |
+
|
| 10 |
# from llama_cpp import Llama
|
| 11 |
import os
|
| 12 |
|
|
|
|
| 44 |
chunks = text_splitter.split_text(raw_text)
|
| 45 |
|
| 46 |
# Embeding
|
| 47 |
+
'''
|
| 48 |
+
🔥 The gpt4all embedding library you’re using was compiled against GLIBC 2.32 or higher,
|
| 49 |
+
but the Hugging Face Docker environment only provides GLIBC 2.31 or lower.
|
| 50 |
+
|
| 51 |
+
So your Space crashes because it tries to load a C-based .so library that depends on a newer system-level runtime.
|
| 52 |
+
|
| 53 |
+
🧠 What is GLIBC?
|
| 54 |
+
GLIBC is the GNU C standard library — it’s a low-level part of Linux.
|
| 55 |
+
Most .so libraries (like libllmodel.so) built from C++ depend on a minimum GLIBC version.
|
| 56 |
+
|
| 57 |
+
You cannot change GLIBC in Hugging Face Docker — so if your library requires GLIBC 2.32+, it will not run.
|
| 58 |
+
|
| 59 |
embedding_model = GPT4AllEmbeddings(model_file= model_file)
|
| 60 |
+
'''
|
| 61 |
+
|
| 62 |
+
embedding_model = HuggingFaceEmbeddings(model_file= model_file)
|
| 63 |
|
| 64 |
# Dua vao Faiss Vector DB
|
| 65 |
db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
|
|
|
|
| 76 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
| 77 |
chunks = text_splitter.split_documents(documents)
|
| 78 |
|
| 79 |
+
embedding_model = HuggingFaceEmbeddings(model_file = model_file)
|
| 80 |
dp = FAISS.from_documents(chunks, embedding_model)
|
| 81 |
dp.save_local(vector_dp_path)
|
| 82 |
return dp
|