Spaces:
Sleeping
Sleeping
Update app/vector_store.py
Browse files- app/vector_store.py +8 -0
app/vector_store.py
CHANGED
|
@@ -34,6 +34,14 @@ def store_pdf_image(pdf_path):
|
|
| 34 |
Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_IMG_DIR)
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# images = convert_from_path("your_file.pdf", poppler_path="/opt/homebrew/bin")
|
| 38 |
|
| 39 |
|
|
|
|
| 34 |
Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_IMG_DIR)
|
| 35 |
|
| 36 |
|
| 37 |
+
def store_pdf_image_text(text):
|
| 38 |
+
doc = Document(page_content=text)
|
| 39 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 40 |
+
chunks = splitter.split_documents([doc])
|
| 41 |
+
embeddings = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
|
| 42 |
+
Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_IMG_DIR)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
# images = convert_from_path("your_file.pdf", poppler_path="/opt/homebrew/bin")
|
| 46 |
|
| 47 |
|